[llvm] 16cf9e6 - [AMDGPU] Fix handling of gfx10 LDS misaligned access bug

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 7 15:08:38 PDT 2022


Author: Stanislav Mekhanoshin
Date: 2022-04-07T15:08:29-07:00
New Revision: 16cf9e6dad9545091a52f8a74848aa80cd751ebf

URL: https://github.com/llvm/llvm-project/commit/16cf9e6dad9545091a52f8a74848aa80cd751ebf
DIFF: https://github.com/llvm/llvm-project/commit/16cf9e6dad9545091a52f8a74848aa80cd751ebf.diff

LOG: [AMDGPU] Fix handling of gfx10 LDS misaligned access bug

It was only handled for FLAT initially because we did not have
unaligned DS instructions lowering. Now it is implemented but
the bug is not handled.

Differential Revision: https://reviews.llvm.org/D123338

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll
    llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
    llvm/test/CodeGen/AMDGPU/store-local.128.ll
    llvm/test/CodeGen/AMDGPU/store-local.96.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 69c1643c63f5b..e37448552a41c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1516,10 +1516,14 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
 
   if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
       AddrSpace == AMDGPUAS::REGION_ADDRESS) {
+    Align RequiredAlignment(PowerOf2Ceil(Size/8)); // Natural alignment.
+    if (Subtarget->hasLDSMisalignedBug() && Size > 32 &&
+        Alignment < RequiredAlignment)
+      return false;
+
     // Check if alignment requirements for ds_read/write instructions are
     // disabled.
-    if (Subtarget->hasUnalignedDSAccessEnabled() &&
-        !Subtarget->hasLDSMisalignedBug()) {
+    if (Subtarget->hasUnalignedDSAccessEnabled()) {
       if (IsFast)
         *IsFast = Alignment != Align(2);
       return true;

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir
index 1083d43381939..c573f74fd6be7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir
@@ -1,7 +1,7 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
 # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -mattr=+cumode -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
 
 ---
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
index dd312e7738ae0..7f672526e828f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
@@ -3,7 +3,7 @@
 # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs  -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs  -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
 # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+cumode -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
 
 ---
 
@@ -23,12 +23,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32), addrspace 3)
     ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
-    ; GFX9-LABEL: name: load_local_s32_from_4
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 3)
-    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
     ; GFX6-LABEL: name: load_local_s32_from_4
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -36,6 +30,18 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32), addrspace 3)
     ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
+    ; GFX9-LABEL: name: load_local_s32_from_4
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
+    ; GFX10-LABEL: name: load_local_s32_from_4
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 3)
     $vgpr0 = COPY %1
@@ -60,12 +66,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3)
     ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U16_]]
-    ; GFX9-LABEL: name: load_local_s32_from_2
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3)
-    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U16_gfx9_]]
     ; GFX6-LABEL: name: load_local_s32_from_2
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -73,6 +73,18 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3)
     ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U16_]]
+    ; GFX9-LABEL: name: load_local_s32_from_2
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U16_gfx9_]]
+    ; GFX10-LABEL: name: load_local_s32_from_2
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U16_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 3)
     $vgpr0 = COPY %1
@@ -100,12 +112,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
     ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
-    ; GFX9-LABEL: name: load_local_s32_from_1
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3)
-    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
     ; GFX6-LABEL: name: load_local_s32_from_1
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -113,6 +119,18 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
     ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
+    ; GFX9-LABEL: name: load_local_s32_from_1
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+    ; GFX10-LABEL: name: load_local_s32_from_1
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 3)
     $vgpr0 = COPY %1
@@ -137,12 +155,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3)
     ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
-    ; GFX9-LABEL: name: load_local_v2s32
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3)
-    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
     ; GFX6-LABEL: name: load_local_v2s32
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -150,6 +162,18 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
+    ; GFX9-LABEL: name: load_local_v2s32
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
+    ; GFX10-LABEL: name: load_local_v2s32
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -174,12 +198,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3)
     ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
-    ; GFX9-LABEL: name: load_local_v2s32_align4
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3)
-    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
     ; GFX6-LABEL: name: load_local_v2s32_align4
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -187,6 +205,18 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-LABEL: name: load_local_v2s32_align4
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
+    ; GFX10-LABEL: name: load_local_v2s32_align4
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -211,12 +241,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s64), addrspace 3)
     ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
-    ; GFX9-LABEL: name: load_local_s64
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 3)
-    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
     ; GFX6-LABEL: name: load_local_s64
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -224,6 +248,18 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s64), addrspace 3)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
+    ; GFX9-LABEL: name: load_local_s64
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
+    ; GFX10-LABEL: name: load_local_s64
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -248,12 +284,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3)
     ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
-    ; GFX9-LABEL: name: load_local_s64_align4
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3)
-    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
     ; GFX6-LABEL: name: load_local_s64_align4
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -261,6 +291,18 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-LABEL: name: load_local_s64_align4
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
+    ; GFX10-LABEL: name: load_local_s64_align4
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -285,12 +327,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p3), addrspace 3)
     ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
-    ; GFX9-LABEL: name: load_local_p3_from_4
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 3)
-    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
     ; GFX6-LABEL: name: load_local_p3_from_4
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -298,6 +334,18 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p3), addrspace 3)
     ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
+    ; GFX9-LABEL: name: load_local_p3_from_4
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
+    ; GFX10-LABEL: name: load_local_p3_from_4
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 3)
     $vgpr0 = COPY %1
@@ -322,12 +370,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p5), addrspace 3)
     ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
-    ; GFX9-LABEL: name: load_local_p5_from_4
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p5), addrspace 3)
-    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
     ; GFX6-LABEL: name: load_local_p5_from_4
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -335,6 +377,18 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p5), addrspace 3)
     ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
+    ; GFX9-LABEL: name: load_local_p5_from_4
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p5), addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
+    ; GFX10-LABEL: name: load_local_p5_from_4
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p5), addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 3)
     $vgpr0 = COPY %1
@@ -359,12 +413,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p1), addrspace 3)
     ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
-    ; GFX9-LABEL: name: load_local_p1_align8
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 3)
-    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
     ; GFX6-LABEL: name: load_local_p1_align8
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -372,6 +420,18 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p1), addrspace 3)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
+    ; GFX9-LABEL: name: load_local_p1_align8
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
+    ; GFX10-LABEL: name: load_local_p1_align8
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -396,12 +456,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (p1), align 4, addrspace 3)
     ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
-    ; GFX9-LABEL: name: load_local_p1_align4
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (p1), align 4, addrspace 3)
-    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
     ; GFX6-LABEL: name: load_local_p1_align4
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -409,6 +463,18 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-LABEL: name: load_local_p1_align4
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (p1), align 4, addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
+    ; GFX10-LABEL: name: load_local_p1_align4
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (p1), align 4, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -433,12 +499,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3)
     ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
-    ; GFX9-LABEL: name: load_local_p999_from_8
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
-    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3)
-    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
     ; GFX6-LABEL: name: load_local_p999_from_8
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -446,6 +506,18 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+    ; GFX9-LABEL: name: load_local_p999_from_8
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+    ; GFX10-LABEL: name: load_local_p999_from_8
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -470,12 +542,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
     ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
-    ; GFX9-LABEL: name: load_local_v2p3
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
-    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
-    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     ; GFX6-LABEL: name: load_local_v2p3
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -483,6 +549,18 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX9-LABEL: name: load_local_v2p3
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX10-LABEL: name: load_local_v2p3
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -507,12 +585,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s16>), addrspace 3)
     ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
-    ; GFX9-LABEL: name: load_local_v2s16
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 3)
-    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
     ; GFX6-LABEL: name: load_local_v2s16
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -520,6 +592,18 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s16>), addrspace 3)
     ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]]
+    ; GFX9-LABEL: name: load_local_v2s16
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
+    ; GFX10-LABEL: name: load_local_v2s16
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 3)
     $vgpr0 = COPY %1
@@ -544,12 +628,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s16>), addrspace 3)
     ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
-    ; GFX9-LABEL: name: load_local_v4s16
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 3)
-    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
     ; GFX6-LABEL: name: load_local_v4s16
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -557,6 +635,18 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s16>), addrspace 3)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
+    ; GFX9-LABEL: name: load_local_v4s16
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
+    ; GFX10-LABEL: name: load_local_v4s16
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -605,12 +695,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
     ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
-    ; GFX9-LABEL: name: load_local_s32_from_1_gep_65535
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load (s8), addrspace 3)
-    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
     ; GFX6-LABEL: name: load_local_s32_from_1_gep_65535
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -620,6 +704,18 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
     ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
+    ; GFX9-LABEL: name: load_local_s32_from_1_gep_65535
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+    ; GFX10-LABEL: name: load_local_s32_from_1_gep_65535
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s32) = G_CONSTANT i32 65535
     %2:vgpr(p3) = G_PTR_ADD %0, %1
@@ -648,14 +744,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
     ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
-    ; GFX9-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
-    ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
-    ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_AND_B32_e64_]], 65535, 0, implicit $exec :: (load (s8), addrspace 3)
-    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
     ; GFX6-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -665,6 +753,22 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
     ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
+    ; GFX9-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
+    ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
+    ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_AND_B32_e64_]], 65535, 0, implicit $exec :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+    ; GFX10-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
+    ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
+    ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_AND_B32_e64_]], 65535, 0, implicit $exec :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = G_CONSTANT i32 2147483647
     %2:vgpr(s32) = G_AND %0, %1
@@ -696,14 +800,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
     ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
-    ; GFX9-LABEL: name: load_local_s32_from_1_gep_65536
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec
-    ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
-    ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3)
-    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
     ; GFX6-LABEL: name: load_local_s32_from_1_gep_65536
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -713,6 +809,22 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
     ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
+    ; GFX9-LABEL: name: load_local_s32_from_1_gep_65536
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec
+    ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+    ; GFX10-LABEL: name: load_local_s32_from_1_gep_65536
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec
+    ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s32) = G_CONSTANT i32 65536
     %2:vgpr(p3) = G_PTR_ADD %0, %1
@@ -741,14 +853,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
     ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
-    ; GFX9-LABEL: name: load_local_s32_from_1_gep_m1
-    ; GFX9: liveins: $vgpr0
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
-    ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
-    ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3)
-    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
     ; GFX6-LABEL: name: load_local_s32_from_1_gep_m1
     ; GFX6: liveins: $vgpr0
     ; GFX6-NEXT: {{  $}}
@@ -758,6 +862,22 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
     ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]]
+    ; GFX9-LABEL: name: load_local_s32_from_1_gep_m1
+    ; GFX9: liveins: $vgpr0
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3)
+    ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+    ; GFX10-LABEL: name: load_local_s32_from_1_gep_m1
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3)
+    ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s32) = G_CONSTANT i32 -1
     %2:vgpr(p3) = G_PTR_ADD %0, %1
@@ -784,12 +904,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 254, 255, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3)
     ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
-    ; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1016
-    ; GFX9: liveins: $vgpr0_vgpr1
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 254, 255, 0, implicit $exec :: (load (s64), align 4, addrspace 3)
-    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
     ; GFX6-LABEL: name: load_local_s64_align4_from_1_gep_1016
     ; GFX6: liveins: $vgpr0_vgpr1
     ; GFX6-NEXT: {{  $}}
@@ -799,6 +913,18 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64), align 4, addrspace 3)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1016
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 254, 255, 0, implicit $exec :: (load (s64), align 4, addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
+    ; GFX10-LABEL: name: load_local_s64_align4_from_1_gep_1016
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 254, 255, 0, implicit $exec :: (load (s64), align 4, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s32) = G_CONSTANT i32 1016
     %2:vgpr(p3) = G_PTR_ADD %0, %1
@@ -827,14 +953,6 @@ body: |
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 %2, 0, 1, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3)
     ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
-    ; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1020
-    ; GFX9: liveins: $vgpr0_vgpr1
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec
-    ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
-    ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3)
-    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
     ; GFX6-LABEL: name: load_local_s64_align4_from_1_gep_1020
     ; GFX6: liveins: $vgpr0_vgpr1
     ; GFX6-NEXT: {{  $}}
@@ -844,6 +962,22 @@ body: |
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64), align 4, addrspace 3)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1020
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec
+    ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
+    ; GFX10-LABEL: name: load_local_s64_align4_from_1_gep_1020
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec
+    ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s32) = G_CONSTANT i32 1020
     %2:vgpr(p3) = G_PTR_ADD %0, %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir
index b1e95ee833d4d..a68831949e282 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir
@@ -3,7 +3,7 @@
 # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs  -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs  -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
 # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+cumode -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
 
 ---
 
@@ -26,12 +26,6 @@ body: |
     ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s32), addrspace 3)
-    ; GFX9-LABEL: name: store_local_s32_to_4
-    ; GFX9: liveins: $vgpr0, $vgpr1
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 3)
     ; GFX6-LABEL: name: store_local_s32_to_4
     ; GFX6: liveins: $vgpr0, $vgpr1
     ; GFX6-NEXT: {{  $}}
@@ -39,6 +33,18 @@ body: |
     ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s32), addrspace 3)
+    ; GFX9-LABEL: name: store_local_s32_to_4
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 3)
+    ; GFX10-LABEL: name: store_local_s32_to_4
+    ; GFX10: liveins: $vgpr0, $vgpr1
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 3)
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(p3) = COPY $vgpr1
     G_STORE %0, %1 :: (store (s32), align 4, addrspace 3)
@@ -66,12 +72,6 @@ body: |
     ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: DS_WRITE_B16 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s16), addrspace 3)
-    ; GFX9-LABEL: name: store_local_s32_to_2
-    ; GFX9: liveins: $vgpr0, $vgpr1
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX9-NEXT: DS_WRITE_B16_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s16), addrspace 3)
     ; GFX6-LABEL: name: store_local_s32_to_2
     ; GFX6: liveins: $vgpr0, $vgpr1
     ; GFX6-NEXT: {{  $}}
@@ -79,6 +79,18 @@ body: |
     ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: DS_WRITE_B16 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s16), addrspace 3)
+    ; GFX9-LABEL: name: store_local_s32_to_2
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX9-NEXT: DS_WRITE_B16_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s16), addrspace 3)
+    ; GFX10-LABEL: name: store_local_s32_to_2
+    ; GFX10: liveins: $vgpr0, $vgpr1
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX10-NEXT: DS_WRITE_B16_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s16), addrspace 3)
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(p3) = COPY $vgpr1
     G_STORE %0, %1 :: (store (s16), align 2, addrspace 3)
@@ -106,12 +118,6 @@ body: |
     ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: DS_WRITE_B8 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3)
-    ; GFX9-LABEL: name: store_local_s32_to_1
-    ; GFX9: liveins: $vgpr0, $vgpr1
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX9-NEXT: DS_WRITE_B8_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s8), addrspace 3)
     ; GFX6-LABEL: name: store_local_s32_to_1
     ; GFX6: liveins: $vgpr0, $vgpr1
     ; GFX6-NEXT: {{  $}}
@@ -119,6 +125,18 @@ body: |
     ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: DS_WRITE_B8 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3)
+    ; GFX9-LABEL: name: store_local_s32_to_1
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX9-NEXT: DS_WRITE_B8_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s8), addrspace 3)
+    ; GFX10-LABEL: name: store_local_s32_to_1
+    ; GFX10: liveins: $vgpr0, $vgpr1
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX10-NEXT: DS_WRITE_B8_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s8), addrspace 3)
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(p3) = COPY $vgpr1
     G_STORE %0, %1 :: (store (s8), align 1, addrspace 3)
@@ -146,12 +164,6 @@ body: |
     ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s16>), addrspace 3)
-    ; GFX9-LABEL: name: store_local_v2s16
-    ; GFX9: liveins: $vgpr0, $vgpr1
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 3)
     ; GFX6-LABEL: name: store_local_v2s16
     ; GFX6: liveins: $vgpr0, $vgpr1
     ; GFX6-NEXT: {{  $}}
@@ -159,6 +171,18 @@ body: |
     ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s16>), addrspace 3)
+    ; GFX9-LABEL: name: store_local_v2s16
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 3)
+    ; GFX10-LABEL: name: store_local_v2s16
+    ; GFX10: liveins: $vgpr0, $vgpr1
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 3)
     %0:vgpr(<2 x s16>) = COPY $vgpr0
     %1:vgpr(p3) = COPY $vgpr1
     G_STORE %0, %1 :: (store (<2 x s16>), align 4, addrspace 3)
@@ -186,12 +210,6 @@ body: |
     ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p3), addrspace 3)
-    ; GFX9-LABEL: name: store_local_p3
-    ; GFX9: liveins: $vgpr0, $vgpr1
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p3), addrspace 3)
     ; GFX6-LABEL: name: store_local_p3
     ; GFX6: liveins: $vgpr0, $vgpr1
     ; GFX6-NEXT: {{  $}}
@@ -199,6 +217,18 @@ body: |
     ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p3), addrspace 3)
+    ; GFX9-LABEL: name: store_local_p3
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p3), addrspace 3)
+    ; GFX10-LABEL: name: store_local_p3
+    ; GFX10: liveins: $vgpr0, $vgpr1
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p3), addrspace 3)
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(p3) = COPY $vgpr1
     G_STORE %0, %1 :: (store (p3), align 4, addrspace 3)
@@ -220,15 +250,19 @@ body: |
     ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3)
-    ; GFX9-LABEL: name: store_local_s32_to_1_constant_4095
-    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
-    ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    ; GFX9-NEXT: DS_WRITE_B8_gfx9 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (store (s8), addrspace 3)
     ; GFX6-LABEL: name: store_local_s32_to_1_constant_4095
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
     ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3)
+    ; GFX9-LABEL: name: store_local_s32_to_1_constant_4095
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
+    ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX9-NEXT: DS_WRITE_B8_gfx9 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (store (s8), addrspace 3)
+    ; GFX10-LABEL: name: store_local_s32_to_1_constant_4095
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
+    ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10-NEXT: DS_WRITE_B8_gfx9 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (store (s8), addrspace 3)
     %0:vgpr(p3) = G_CONSTANT i32 4095
     %1:vgpr(s32) = G_CONSTANT i32 0
     G_STORE %1, %0 :: (store (s8), align 1, addrspace 3)
@@ -255,15 +289,19 @@ body: |
     ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3)
-    ; GFX9-LABEL: name: store_local_s32_to_1_constant_4096
-    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
-    ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    ; GFX9-NEXT: DS_WRITE_B8_gfx9 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (store (s8), addrspace 3)
     ; GFX6-LABEL: name: store_local_s32_to_1_constant_4096
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
     ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3)
+    ; GFX9-LABEL: name: store_local_s32_to_1_constant_4096
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+    ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX9-NEXT: DS_WRITE_B8_gfx9 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (store (s8), addrspace 3)
+    ; GFX10-LABEL: name: store_local_s32_to_1_constant_4096
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+    ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10-NEXT: DS_WRITE_B8_gfx9 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (store (s8), addrspace 3)
     %0:vgpr(p3) = G_CONSTANT i32 4096
     %1:vgpr(s32) = G_CONSTANT i32 0
     G_STORE %1, %0 :: (store (s8), align 1, addrspace 3)
@@ -293,6 +331,13 @@ body: |
     ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
     ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3)
+    ; GFX6-LABEL: name: store_local_s64_align4
+    ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX6-NEXT: {{  $}}
+    ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+    ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+    ; GFX6-NEXT: $m0 = S_MOV_B32 -1
+    ; GFX6-NEXT: G_STORE [[COPY]](s64), [[COPY1]](p3) :: (store (s64), align 4, addrspace 3)
     ; GFX9-LABEL: name: store_local_s64_align4
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
     ; GFX9-NEXT: {{  $}}
@@ -301,13 +346,14 @@ body: |
     ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
     ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3)
-    ; GFX6-LABEL: name: store_local_s64_align4
-    ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX6-NEXT: {{  $}}
-    ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
-    ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
-    ; GFX6-NEXT: $m0 = S_MOV_B32 -1
-    ; GFX6-NEXT: G_STORE [[COPY]](s64), [[COPY1]](p3) :: (store (s64), align 4, addrspace 3)
+    ; GFX10-LABEL: name: store_local_s64_align4
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3)
     %0:vgpr(s64) = COPY $vgpr0_vgpr1
     %1:vgpr(p3) = COPY $vgpr2
     G_STORE %0, %1 :: (store (s64), align 4, addrspace 3)
@@ -337,6 +383,13 @@ body: |
     ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
     ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (p1), align 4, addrspace 3)
+    ; GFX6-LABEL: name: store_local_p1_align4
+    ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX6-NEXT: {{  $}}
+    ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+    ; GFX6-NEXT: $m0 = S_MOV_B32 -1
+    ; GFX6-NEXT: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store (p1), align 4, addrspace 3)
     ; GFX9-LABEL: name: store_local_p1_align4
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
     ; GFX9-NEXT: {{  $}}
@@ -345,13 +398,14 @@ body: |
     ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
     ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (p1), align 4, addrspace 3)
-    ; GFX6-LABEL: name: store_local_p1_align4
-    ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX6-NEXT: {{  $}}
-    ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
-    ; GFX6-NEXT: $m0 = S_MOV_B32 -1
-    ; GFX6-NEXT: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store (p1), align 4, addrspace 3)
+    ; GFX10-LABEL: name: store_local_p1_align4
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (p1), align 4, addrspace 3)
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(p3) = COPY $vgpr2
     G_STORE %0, %1 :: (store (p1), align 4, addrspace 3)
@@ -381,6 +435,13 @@ body: |
     ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
     ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3)
+    ; GFX6-LABEL: name: store_local_v2s32_align4
+    ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX6-NEXT: {{  $}}
+    ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+    ; GFX6-NEXT: $m0 = S_MOV_B32 -1
+    ; GFX6-NEXT: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store (<2 x s32>), align 4, addrspace 3)
     ; GFX9-LABEL: name: store_local_v2s32_align4
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
     ; GFX9-NEXT: {{  $}}
@@ -389,13 +450,14 @@ body: |
     ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
     ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3)
-    ; GFX6-LABEL: name: store_local_v2s32_align4
-    ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX6-NEXT: {{  $}}
-    ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
-    ; GFX6-NEXT: $m0 = S_MOV_B32 -1
-    ; GFX6-NEXT: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store (<2 x s32>), align 4, addrspace 3)
+    ; GFX10-LABEL: name: store_local_v2s32_align4
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3)
     %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:vgpr(p3) = COPY $vgpr2
     G_STORE %0, %1 :: (store (<2 x s32>), align 4, addrspace 3)
@@ -425,6 +487,13 @@ body: |
     ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
     ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3)
+    ; GFX6-LABEL: name: store_local_v4s16_align4
+    ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX6-NEXT: {{  $}}
+    ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+    ; GFX6-NEXT: $m0 = S_MOV_B32 -1
+    ; GFX6-NEXT: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store (<4 x s16>), align 4, addrspace 3)
     ; GFX9-LABEL: name: store_local_v4s16_align4
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
     ; GFX9-NEXT: {{  $}}
@@ -433,13 +502,14 @@ body: |
     ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
     ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3)
-    ; GFX6-LABEL: name: store_local_v4s16_align4
-    ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX6-NEXT: {{  $}}
-    ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
-    ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
-    ; GFX6-NEXT: $m0 = S_MOV_B32 -1
-    ; GFX6-NEXT: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store (<4 x s16>), align 4, addrspace 3)
+    ; GFX10-LABEL: name: store_local_v4s16_align4
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3)
     %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:vgpr(p3) = COPY $vgpr2
     G_STORE %0, %1 :: (store (<4 x s16>), align 4, addrspace 3)
@@ -467,12 +537,6 @@ body: |
     ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s64), addrspace 3)
-    ; GFX9-LABEL: name: store_local_s64_align8
-    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s64), addrspace 3)
     ; GFX6-LABEL: name: store_local_s64_align8
     ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
     ; GFX6-NEXT: {{  $}}
@@ -480,6 +544,18 @@ body: |
     ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s64), addrspace 3)
+    ; GFX9-LABEL: name: store_local_s64_align8
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s64), addrspace 3)
+    ; GFX10-LABEL: name: store_local_s64_align8
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s64), addrspace 3)
     %0:vgpr(s64) = COPY $vgpr0_vgpr1
     %1:vgpr(p3) = COPY $vgpr2
     G_STORE %0, %1 :: (store (s64), align 8, addrspace 3)
@@ -507,12 +583,6 @@ body: |
     ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p1), addrspace 3)
-    ; GFX9-LABEL: name: store_local_p1_align8
-    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p1), addrspace 3)
     ; GFX6-LABEL: name: store_local_p1_align8
     ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
     ; GFX6-NEXT: {{  $}}
@@ -520,6 +590,18 @@ body: |
     ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p1), addrspace 3)
+    ; GFX9-LABEL: name: store_local_p1_align8
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p1), addrspace 3)
+    ; GFX10-LABEL: name: store_local_p1_align8
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p1), addrspace 3)
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(p3) = COPY $vgpr2
     G_STORE %0, %1 :: (store (p1), align 8, addrspace 3)
@@ -547,12 +629,6 @@ body: |
     ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), addrspace 3)
-    ; GFX9-LABEL: name: store_local_v2s32_align8
-    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 3)
     ; GFX6-LABEL: name: store_local_v2s32_align8
     ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
     ; GFX6-NEXT: {{  $}}
@@ -560,6 +636,18 @@ body: |
     ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), addrspace 3)
+    ; GFX9-LABEL: name: store_local_v2s32_align8
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 3)
+    ; GFX10-LABEL: name: store_local_v2s32_align8
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 3)
     %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:vgpr(p3) = COPY $vgpr2
     G_STORE %0, %1 :: (store (<2 x s32>), align 8, addrspace 3)
@@ -587,12 +675,6 @@ body: |
     ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
     ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), addrspace 3)
-    ; GFX9-LABEL: name: store_local_v4s16_align8
-    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 3)
     ; GFX6-LABEL: name: store_local_v4s16_align8
     ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
     ; GFX6-NEXT: {{  $}}
@@ -600,6 +682,18 @@ body: |
     ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), addrspace 3)
+    ; GFX9-LABEL: name: store_local_v4s16_align8
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 3)
+    ; GFX10-LABEL: name: store_local_v4s16_align8
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 3)
     %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:vgpr(p3) = COPY $vgpr2
     G_STORE %0, %1 :: (store (<4 x s16>), align 8, addrspace 3)
@@ -629,14 +723,6 @@ body: |
     ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
     ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3)
-    ; GFX9-LABEL: name: store_local_s64_align4_from_1_gep_1016
-    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
-    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
-    ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $exec :: (store (s64), align 4, addrspace 3)
     ; GFX6-LABEL: name: store_local_s64_align4_from_1_gep_1016
     ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
     ; GFX6-NEXT: {{  $}}
@@ -646,6 +732,22 @@ body: |
     ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY1]], [[C]](s32)
     ; GFX6-NEXT: $m0 = S_MOV_B32 -1
     ; GFX6-NEXT: G_STORE [[COPY]](s64), [[PTR_ADD]](p3) :: (store (s64), align 4, addrspace 3)
+    ; GFX9-LABEL: name: store_local_s64_align4_from_1_gep_1016
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $exec :: (store (s64), align 4, addrspace 3)
+    ; GFX10-LABEL: name: store_local_s64_align4_from_1_gep_1016
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $exec :: (store (s64), align 4, addrspace 3)
     %0:vgpr(s64) = COPY $vgpr0_vgpr1
     %1:vgpr(p3) = COPY $vgpr2
     %2:vgpr(s32) = G_CONSTANT i32 1016
@@ -679,6 +781,15 @@ body: |
     ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
     ; GFX7-NEXT: DS_WRITE2_B32 %3, [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3)
+    ; GFX6-LABEL: name: store_local_s64_align4_from_1_gep_1020
+    ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX6-NEXT: {{  $}}
+    ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+    ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+    ; GFX6-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1020
+    ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY1]], [[C]](s32)
+    ; GFX6-NEXT: $m0 = S_MOV_B32 -1
+    ; GFX6-NEXT: G_STORE [[COPY]](s64), [[PTR_ADD]](p3) :: (store (s64), align 4, addrspace 3)
     ; GFX9-LABEL: name: store_local_s64_align4_from_1_gep_1020
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
     ; GFX9-NEXT: {{  $}}
@@ -689,15 +800,16 @@ body: |
     ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
     ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[V_ADD_U32_e64_]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3)
-    ; GFX6-LABEL: name: store_local_s64_align4_from_1_gep_1020
-    ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
-    ; GFX6-NEXT: {{  $}}
-    ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
-    ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
-    ; GFX6-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1020
-    ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY1]], [[C]](s32)
-    ; GFX6-NEXT: $m0 = S_MOV_B32 -1
-    ; GFX6-NEXT: G_STORE [[COPY]](s64), [[PTR_ADD]](p3) :: (store (s64), align 4, addrspace 3)
+    ; GFX10-LABEL: name: store_local_s64_align4_from_1_gep_1020
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec
+    ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[V_ADD_U32_e64_]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3)
     %0:vgpr(s64) = COPY $vgpr0_vgpr1
     %1:vgpr(p3) = COPY $vgpr2
     %2:vgpr(s32) = G_CONSTANT i32 1020

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll
index efb8ff4ec3824..d7d1a4cc819db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll
@@ -1,7 +1,7 @@
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,ALIGNED %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,ALIGNED %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,ALIGNED %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode < %s | FileCheck -check-prefixes=GCN,ALIGNED %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,ALIGNED,ALIGNED-WGP %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,ALIGNED,ALIGNED-WGP %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,ALIGNED,ALIGNED-WGP %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode < %s | FileCheck -check-prefixes=GCN,ALIGNED,ALIGNED-CU %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode,+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,UNALIGNED %s
 
 ; GCN-LABEL: test_local_misaligned_v2:
@@ -106,8 +106,12 @@ bb:
 }
 
 ; GCN-LABEL: test_local_v4_aligned8:
-; ALIGNED-DAG: ds_read2_b64
-; ALIGNED-DAG: ds_write2_b64
+; ALIGNED-WGP-DAG: ds_read2_b32
+; ALIGNED-WGP-DAG: ds_read2_b32
+; ALIGNED-WGP-DAG: ds_write2_b32
+; ALIGNED-WGP-DAG: ds_write2_b32
+; ALIGNED-CU-DAG: ds_read2_b64
+; ALIGNED-CU-DAG: ds_write2_b64
 ; UNALIGNED-DAG: ds_read2_b64
 ; UNALIGNED-DAG: ds_write2_b64
 define amdgpu_kernel void @test_local_v4_aligned8(i32 addrspace(3)* %arg) {

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
index 6115d2b1769a7..6c91907505498 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
@@ -376,14 +376,8 @@ body: |
     ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s16_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -505,14 +499,8 @@ body: |
     ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s32_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 3)
     $vgpr0 = COPY %1
@@ -650,24 +638,8 @@ body: |
     ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s32_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
-    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 3)
     $vgpr0 = COPY %1
@@ -967,20 +939,14 @@ body: |
     ; GFX10-NEXT: $vgpr0 = COPY [[OR1]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
     ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]]
-    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR1]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load (s24), align 1, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -1261,26 +1227,16 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]]
-    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]]
-    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]]
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -1538,44 +1494,16 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
-    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
-    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]]
-    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]]
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -1900,53 +1828,14 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align16
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3)
     ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
-    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
     ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p3) = COPY $vgpr0
@@ -2315,28 +2204,14 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
     ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p3) = COPY $vgpr0
@@ -2663,53 +2538,14 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3)
     ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
-    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
     ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p3) = COPY $vgpr0
@@ -3128,68 +2964,17 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align16
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3)
     ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
-    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]]
-    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
     ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p3) = COPY $vgpr0
@@ -3243,13 +3028,33 @@ body: |
     ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX10-LABEL: name: test_load_local_s128_align8
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
-    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
     ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align8
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p3) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load (s128), align 8, addrspace 3)
@@ -3578,35 +3383,17 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
     ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p3) = COPY $vgpr0
@@ -4025,68 +3812,17 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3)
     ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
-    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]]
-    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
     ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p3) = COPY $vgpr0
@@ -4178,12 +3914,30 @@ body: |
     ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     ; GFX10-LABEL: name: test_load_local_p1_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
-    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32)
+    ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]]
+    ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]]
+    ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1)
     %0:_(p3) = COPY $vgpr0
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -4339,26 +4093,16 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]]
-    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]]
-    ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]]
+    ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1)
     %0:_(p3) = COPY $vgpr0
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 2, addrspace 3)
@@ -4623,44 +4367,16 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
-    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
-    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]]
-    ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]]
+    ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1)
     %0:_(p3) = COPY $vgpr0
     %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 3)
@@ -4788,15 +4504,8 @@ body: |
     ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_p3_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
-    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     %0:_(p3) = COPY $vgpr0
     %1:_(p3) = G_LOAD %0 :: (load (p3), align 2, addrspace 3)
     $vgpr0 = COPY %1
@@ -4940,25 +4649,8 @@ body: |
     ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_p3_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
-    ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
-    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3)
     %0:_(p3) = COPY $vgpr0
     %1:_(p3) = G_LOAD %0 :: (load (p3), align 1, addrspace 3)
     $vgpr0 = COPY %1
@@ -5085,15 +4777,8 @@ body: |
     ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_p5_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
-    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     %0:_(p3) = COPY $vgpr0
     %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 3)
     $vgpr0 = COPY %1
@@ -5237,25 +4922,8 @@ body: |
     ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_p5_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
-    ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
-    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5)
     %0:_(p3) = COPY $vgpr0
     %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 3)
     $vgpr0 = COPY %1
@@ -5484,15 +5152,10 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s8_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 3)
@@ -6003,41 +5666,36 @@ body: |
     ; GFX10-NEXT: $vgpr0 = COPY [[OR4]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s8_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
     ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]]
-    ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32)
     ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
-    ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
     ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]]
-    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]]
     ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
     ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16)
-    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]]
-    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
-    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR3]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 3)
     %2:_(s24) = G_BITCAST %1
@@ -7307,122 +6965,72 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v16s8_align16
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3)
     ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]]
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
-    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]]
-    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]]
-    ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; GFX10-UNALIGNED-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C6]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[OR5]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[OR5]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[OR5]], [[C6]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[OR8]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[OR8]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[OR8]], [[C6]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[OR11]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[OR11]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[OR11]], [[C6]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C7]]
-    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL12]]
-    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C7]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C7]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C7]]
-    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C7]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL15]]
-    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C7]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C7]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; GFX10-UNALIGNED-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[OR8]], [[C7]]
-    ; GFX10-UNALIGNED-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C7]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL18]]
-    ; GFX10-UNALIGNED-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C7]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
-    ; GFX10-UNALIGNED-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C7]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
-    ; GFX10-UNALIGNED-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[OR11]], [[C7]]
-    ; GFX10-UNALIGNED-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C7]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL21]]
-    ; GFX10-UNALIGNED-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C7]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
-    ; GFX10-UNALIGNED-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C7]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-UNALIGNED-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD2]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD2]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD2]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD3]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD3]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD3]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
+    ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; GFX10-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
+    ; GFX10-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C5]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; GFX10-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; GFX10-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C5]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-UNALIGNED-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
+    ; GFX10-UNALIGNED-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C5]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; GFX10-UNALIGNED-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C5]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; GFX10-UNALIGNED-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; GFX10-UNALIGNED-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
+    ; GFX10-UNALIGNED-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C5]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; GFX10-UNALIGNED-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C5]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; GFX10-UNALIGNED-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C5]]
+    ; GFX10-UNALIGNED-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 1, addrspace 3)
@@ -7557,12 +7165,8 @@ body: |
     ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s16_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
-    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 3)
     $vgpr0 = COPY %1
@@ -7712,22 +7316,8 @@ body: |
     ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s16_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[OR]](s32), [[OR1]](s32)
-    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 3)
     $vgpr0 = COPY %1
@@ -8451,35 +8041,21 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 1, addrspace 3)
     ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
     ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
     ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[OR]](s32), [[OR1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[OR2]](s32), [[BITCAST]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32)
     ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32)
     ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
@@ -8586,12 +8162,36 @@ body: |
     ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     ; GFX10-LABEL: name: test_load_local_v4s16_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
-    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 4, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -9019,36 +8619,18 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[OR]](s32), [[OR1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[OR2]](s32), [[OR3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32)
     ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(p3) = COPY $vgpr0
@@ -9262,21 +8844,11 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 3)
@@ -9511,39 +9083,11 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
-    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 3)
@@ -9862,53 +9406,14 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s32_align16
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3)
     ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
-    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 1, addrspace 3)
@@ -10094,12 +9599,32 @@ body: |
     ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     ; GFX10-LABEL: name: test_load_local_v4s32_align8
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
-    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
+    ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
+    ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align8
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -10412,35 +9937,17 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 2, addrspace 3)
@@ -10851,68 +10358,17 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3)
     ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
-    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]]
-    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 1, addrspace 3)
@@ -11680,77 +11136,26 @@ body: |
     ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s64_align16
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]]
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD2]](s32)
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
-    ; GFX10-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
-    ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]]
-    ; GFX10-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]]
-    ; GFX10-UNALIGNED-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD3]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[ZEXT1]]
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64)
     ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 3)
@@ -13347,98 +12752,23 @@ body: |
     ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3)
     ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
-    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
     ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10-UNALIGNED-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]]
-    ; GFX10-UNALIGNED-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s32) from unknown-address + 16, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, align 1, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32)
     ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
     ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
     ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
@@ -13772,49 +13102,23 @@ body: |
     ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
-    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]]
-    ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32)
     ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX10-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]]
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3)
-    ; GFX10-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32)
-    ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]]
-    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s32) from unknown-address + 16, align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32)
+    ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, align 2, addrspace 3)
+    ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32)
     ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
     ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
     ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll
index fbaa646cd23ef..5dc6a1c38759b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll
@@ -314,7 +314,9 @@ define <4 x i32> @load_lds_v4i32_align8(<4 x i32> addrspace(3)* %ptr) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    ds_read2_b64 v[0:3], v0 offset1:1
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0
+; GFX10-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
+; GFX10-NEXT:    ds_read2_b32 v[2:3], v2 offset0:2 offset1:3
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 8

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll
index 8a60d327e653e..94a618af642ec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll
@@ -75,50 +75,10 @@ define <4 x i32> @load_lds_v4i32_align1(<4 x i32> addrspace(3)* %ptr) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    ds_read_u8 v1, v0
-; GFX10-NEXT:    ds_read_u8 v2, v0 offset:1
-; GFX10-NEXT:    ds_read_u8 v3, v0 offset:2
-; GFX10-NEXT:    ds_read_u8 v4, v0 offset:3
-; GFX10-NEXT:    ds_read_u8 v5, v0 offset:4
-; GFX10-NEXT:    ds_read_u8 v6, v0 offset:5
-; GFX10-NEXT:    ds_read_u8 v7, v0 offset:6
-; GFX10-NEXT:    ds_read_u8 v8, v0 offset:7
-; GFX10-NEXT:    ds_read_u8 v9, v0 offset:8
-; GFX10-NEXT:    ds_read_u8 v10, v0 offset:9
-; GFX10-NEXT:    ds_read_u8 v11, v0 offset:10
-; GFX10-NEXT:    ds_read_u8 v12, v0 offset:11
-; GFX10-NEXT:    ds_read_u8 v13, v0 offset:12
-; GFX10-NEXT:    ds_read_u8 v14, v0 offset:13
-; GFX10-NEXT:    ds_read_u8 v15, v0 offset:15
-; GFX10-NEXT:    ds_read_u8 v0, v0 offset:14
-; GFX10-NEXT:    s_waitcnt lgkmcnt(14)
-; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 8, v1
-; GFX10-NEXT:    s_waitcnt lgkmcnt(13)
-; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX10-NEXT:    s_waitcnt lgkmcnt(12)
-; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v4
-; GFX10-NEXT:    s_waitcnt lgkmcnt(10)
-; GFX10-NEXT:    v_lshl_or_b32 v4, v6, 8, v5
-; GFX10-NEXT:    s_waitcnt lgkmcnt(9)
-; GFX10-NEXT:    v_lshlrev_b32_e32 v6, 16, v7
-; GFX10-NEXT:    s_waitcnt lgkmcnt(8)
-; GFX10-NEXT:    v_lshlrev_b32_e32 v5, 24, v8
-; GFX10-NEXT:    s_waitcnt lgkmcnt(6)
-; GFX10-NEXT:    v_lshl_or_b32 v7, v10, 8, v9
-; GFX10-NEXT:    s_waitcnt lgkmcnt(5)
-; GFX10-NEXT:    v_lshlrev_b32_e32 v9, 16, v11
-; GFX10-NEXT:    s_waitcnt lgkmcnt(4)
-; GFX10-NEXT:    v_lshlrev_b32_e32 v8, 24, v12
-; GFX10-NEXT:    s_waitcnt lgkmcnt(2)
-; GFX10-NEXT:    v_lshl_or_b32 v10, v14, 8, v13
-; GFX10-NEXT:    s_waitcnt lgkmcnt(1)
-; GFX10-NEXT:    v_lshlrev_b32_e32 v11, 24, v15
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0
+; GFX10-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
+; GFX10-NEXT:    ds_read2_b32 v[2:3], v2 offset0:2 offset1:3
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_lshlrev_b32_e32 v12, 16, v0
-; GFX10-NEXT:    v_or3_b32 v0, v2, v3, v1
-; GFX10-NEXT:    v_or3_b32 v1, v5, v6, v4
-; GFX10-NEXT:    v_or3_b32 v2, v8, v9, v7
-; GFX10-NEXT:    v_or3_b32 v3, v11, v12, v10
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 1
   ret <4 x i32> %load
@@ -180,39 +140,10 @@ define <3 x i32> @load_lds_v3i32_align1(<3 x i32> addrspace(3)* %ptr) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    ds_read_u8 v1, v0
-; GFX10-NEXT:    ds_read_u8 v2, v0 offset:1
-; GFX10-NEXT:    ds_read_u8 v3, v0 offset:2
-; GFX10-NEXT:    ds_read_u8 v4, v0 offset:3
-; GFX10-NEXT:    ds_read_u8 v5, v0 offset:4
-; GFX10-NEXT:    ds_read_u8 v6, v0 offset:5
-; GFX10-NEXT:    ds_read_u8 v7, v0 offset:6
-; GFX10-NEXT:    ds_read_u8 v8, v0 offset:7
-; GFX10-NEXT:    ds_read_u8 v9, v0 offset:8
-; GFX10-NEXT:    ds_read_u8 v10, v0 offset:9
-; GFX10-NEXT:    ds_read_u8 v11, v0 offset:11
-; GFX10-NEXT:    ds_read_u8 v0, v0 offset:10
-; GFX10-NEXT:    s_waitcnt lgkmcnt(10)
-; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 8, v1
-; GFX10-NEXT:    s_waitcnt lgkmcnt(9)
-; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX10-NEXT:    s_waitcnt lgkmcnt(8)
-; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v4
-; GFX10-NEXT:    s_waitcnt lgkmcnt(6)
-; GFX10-NEXT:    v_lshl_or_b32 v4, v6, 8, v5
-; GFX10-NEXT:    s_waitcnt lgkmcnt(5)
-; GFX10-NEXT:    v_lshlrev_b32_e32 v6, 16, v7
-; GFX10-NEXT:    s_waitcnt lgkmcnt(4)
-; GFX10-NEXT:    v_lshlrev_b32_e32 v5, 24, v8
-; GFX10-NEXT:    s_waitcnt lgkmcnt(2)
-; GFX10-NEXT:    v_lshl_or_b32 v7, v10, 8, v9
-; GFX10-NEXT:    s_waitcnt lgkmcnt(1)
-; GFX10-NEXT:    v_lshlrev_b32_e32 v8, 24, v11
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0
+; GFX10-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
+; GFX10-NEXT:    ds_read_b32 v2, v2 offset:8
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_lshlrev_b32_e32 v9, 16, v0
-; GFX10-NEXT:    v_or3_b32 v0, v2, v3, v1
-; GFX10-NEXT:    v_or3_b32 v1, v5, v6, v4
-; GFX10-NEXT:    v_or3_b32 v2, v8, v9, v7
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 1
   ret <3 x i32> %load
@@ -265,34 +196,8 @@ define void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, <4 x i32> %x)
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
-; GFX10-NEXT:    v_lshrrev_b16 v6, 8, v1
-; GFX10-NEXT:    ds_write_b8 v0, v1
-; GFX10-NEXT:    ds_write_b8_d16_hi v0, v1 offset:2
-; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
-; GFX10-NEXT:    v_lshrrev_b16 v7, 8, v2
-; GFX10-NEXT:    v_lshrrev_b16 v5, 8, v5
-; GFX10-NEXT:    ds_write_b8 v0, v2 offset:4
-; GFX10-NEXT:    ds_write_b8 v0, v6 offset:1
-; GFX10-NEXT:    ds_write_b8 v0, v5 offset:3
-; GFX10-NEXT:    ds_write_b8 v0, v7 offset:5
-; GFX10-NEXT:    ds_write_b8_d16_hi v0, v2 offset:6
-; GFX10-NEXT:    v_lshrrev_b16 v1, 8, v1
-; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
-; GFX10-NEXT:    v_lshrrev_b16 v5, 8, v3
-; GFX10-NEXT:    ds_write_b8 v0, v1 offset:7
-; GFX10-NEXT:    ds_write_b8 v0, v3 offset:8
-; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
-; GFX10-NEXT:    v_lshrrev_b16 v2, 8, v2
-; GFX10-NEXT:    ds_write_b8 v0, v5 offset:9
-; GFX10-NEXT:    v_lshrrev_b16 v5, 8, v4
-; GFX10-NEXT:    ds_write_b8_d16_hi v0, v3 offset:10
-; GFX10-NEXT:    v_lshrrev_b16 v1, 8, v1
-; GFX10-NEXT:    ds_write_b8 v0, v2 offset:11
-; GFX10-NEXT:    ds_write_b8 v0, v4 offset:12
-; GFX10-NEXT:    ds_write_b8 v0, v5 offset:13
-; GFX10-NEXT:    ds_write_b8_d16_hi v0, v4 offset:14
-; GFX10-NEXT:    ds_write_b8 v0, v1 offset:15
+; GFX10-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
+; GFX10-NEXT:    ds_write2_b32 v0, v3, v4 offset0:2 offset1:3
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 1
@@ -339,27 +244,8 @@ define void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x)
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
-; GFX10-NEXT:    v_lshrrev_b16 v5, 8, v1
-; GFX10-NEXT:    ds_write_b8 v0, v1
-; GFX10-NEXT:    ds_write_b8_d16_hi v0, v1 offset:2
-; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
-; GFX10-NEXT:    v_lshrrev_b16 v6, 8, v2
-; GFX10-NEXT:    v_lshrrev_b16 v4, 8, v4
-; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; GFX10-NEXT:    ds_write_b8 v0, v2 offset:4
-; GFX10-NEXT:    v_lshrrev_b16 v1, 8, v1
-; GFX10-NEXT:    ds_write_b8 v0, v5 offset:1
-; GFX10-NEXT:    ds_write_b8 v0, v4 offset:3
-; GFX10-NEXT:    ds_write_b8 v0, v6 offset:5
-; GFX10-NEXT:    v_lshrrev_b16 v4, 8, v3
-; GFX10-NEXT:    ds_write_b8_d16_hi v0, v2 offset:6
-; GFX10-NEXT:    v_lshrrev_b16 v2, 8, v7
-; GFX10-NEXT:    ds_write_b8 v0, v1 offset:7
-; GFX10-NEXT:    ds_write_b8 v0, v3 offset:8
-; GFX10-NEXT:    ds_write_b8 v0, v4 offset:9
-; GFX10-NEXT:    ds_write_b8_d16_hi v0, v3 offset:10
-; GFX10-NEXT:    ds_write_b8 v0, v2 offset:11
+; GFX10-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
+; GFX10-NEXT:    ds_write_b32 v0, v3 offset:8
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll
index b662b219abdb4..913cb7306483e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll
@@ -397,11 +397,12 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out,
 ; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s4
-; GFX10-NEXT:    v_mov_b32_e32 v1, s5
-; GFX10-NEXT:    v_mov_b32_e32 v2, s6
-; GFX10-NEXT:    v_mov_b32_e32 v3, s7
-; GFX10-NEXT:    v_mov_b32_e32 v4, s2
-; GFX10-NEXT:    ds_write2_b64 v4, v[0:1], v[2:3] offset1:1
+; GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; GFX10-NEXT:    v_mov_b32_e32 v2, s5
+; GFX10-NEXT:    v_mov_b32_e32 v3, s6
+; GFX10-NEXT:    v_mov_b32_e32 v4, s7
+; GFX10-NEXT:    ds_write2_b32 v1, v0, v2 offset1:1
+; GFX10-NEXT:    ds_write2_b32 v1, v3, v4 offset0:2 offset1:3
 ; GFX10-NEXT:    s_endpgm
   store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 8
   ret void

diff  --git a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
index 41d33f96fc218..8acdc649f2efd 100644
--- a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
@@ -274,10 +274,11 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 {
 ; GFX10-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX10-NEXT:    v_mov_b32_e32 v2, 0
-; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0x3fb, v0
-; GFX10-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
+; GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-NEXT:    v_mov_b32_e32 v2, 0x7b
+; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0, v0
+; GFX10-NEXT:    ds_write_b32 v0, v1 offset:1023
+; GFX10-NEXT:    ds_write_b32 v0, v2 offset:1019
 ; GFX10-NEXT:    s_endpgm
   %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
   %neg = sub i32 0, %x.i
@@ -333,12 +334,13 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_
 ; GFX10-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX10-NEXT:    s_mov_b32 vcc_lo, 0
-; GFX10-NEXT:    v_mov_b32_e32 v3, 0x7b
-; GFX10-NEXT:    v_mov_b32_e32 v4, 0
-; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 0x3fb, v0
+; GFX10-NEXT:    v_mov_b32_e32 v3, 0
+; GFX10-NEXT:    v_mov_b32_e32 v4, 0x7b
+; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 0, v0
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10-NEXT:    ds_write2_b32 v2, v3, v4 offset1:1
+; GFX10-NEXT:    ds_write_b32 v2, v3 offset:1023
+; GFX10-NEXT:    ds_write_b32 v2, v4 offset:1019
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_div_fmas_f32 v5, s0, s0, s0
 ; GFX10-NEXT:    global_store_dword v[0:1], v5, off
@@ -378,10 +380,11 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #
 ; GFX10-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX10-NEXT:    v_mov_b32_e32 v2, 0
-; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0x3fc, v0
-; GFX10-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
+; GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-NEXT:    v_mov_b32_e32 v2, 0x7b
+; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v0, 0x200, v0
+; GFX10-NEXT:    ds_write2_b32 v0, v2, v1 offset0:127 offset1:128
 ; GFX10-NEXT:    s_endpgm
   %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
   %neg = sub i32 0, %x.i

diff  --git a/llvm/test/CodeGen/AMDGPU/store-local.128.ll b/llvm/test/CodeGen/AMDGPU/store-local.128.ll
index 5514e775d5336..50905ea44eb65 100644
--- a/llvm/test/CodeGen/AMDGPU/store-local.128.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-local.128.ll
@@ -417,12 +417,12 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out,
 ; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s2
-; GFX10-NEXT:    v_mov_b32_e32 v1, s4
-; GFX10-NEXT:    v_mov_b32_e32 v2, s5
-; GFX10-NEXT:    v_mov_b32_e32 v3, s6
-; GFX10-NEXT:    v_mov_b32_e32 v4, s7
-; GFX10-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
-; GFX10-NEXT:    ds_write2_b32 v0, v3, v4 offset0:2 offset1:3
+; GFX10-NEXT:    v_mov_b32_e32 v1, s6
+; GFX10-NEXT:    v_mov_b32_e32 v2, s7
+; GFX10-NEXT:    v_mov_b32_e32 v3, s4
+; GFX10-NEXT:    v_mov_b32_e32 v4, s5
+; GFX10-NEXT:    ds_write2_b32 v0, v1, v2 offset0:2 offset1:3
+; GFX10-NEXT:    ds_write2_b32 v0, v3, v4 offset1:1
 ; GFX10-NEXT:    s_endpgm
   store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 4
   ret void
@@ -473,13 +473,13 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v4i32_align8:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_mov_b32_e32 v4, s2
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s4
-; GFX10-NEXT:    v_mov_b32_e32 v2, s6
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s5
+; GFX10-NEXT:    v_mov_b32_e32 v4, s2
+; GFX10-NEXT:    v_mov_b32_e32 v2, s6
 ; GFX10-NEXT:    v_mov_b32_e32 v3, s7
 ; GFX10-NEXT:    ds_write2_b64 v4, v[0:1], v[2:3] offset1:1
 ; GFX10-NEXT:    s_endpgm

diff  --git a/llvm/test/CodeGen/AMDGPU/store-local.96.ll b/llvm/test/CodeGen/AMDGPU/store-local.96.ll
index 7f57a1150ed5e..3c2ccf7ebd1cd 100644
--- a/llvm/test/CodeGen/AMDGPU/store-local.96.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-local.96.ll
@@ -355,11 +355,11 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out,
 ; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s2
-; GFX10-NEXT:    v_mov_b32_e32 v1, s4
-; GFX10-NEXT:    v_mov_b32_e32 v2, s5
-; GFX10-NEXT:    v_mov_b32_e32 v3, s6
-; GFX10-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
-; GFX10-NEXT:    ds_write_b32 v0, v3 offset:8
+; GFX10-NEXT:    v_mov_b32_e32 v1, s6
+; GFX10-NEXT:    v_mov_b32_e32 v2, s4
+; GFX10-NEXT:    v_mov_b32_e32 v3, s5
+; GFX10-NEXT:    ds_write_b32 v0, v1 offset:8
+; GFX10-NEXT:    ds_write2_b32 v0, v2, v3 offset1:1
 ; GFX10-NEXT:    s_endpgm
   store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 4
   ret void


        


More information about the llvm-commits mailing list