[llvm] ae36c02 - [AMDGPU] Set DS alignment requirements to be more strict

Mirko Brkusanin via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 18 06:28:22 PDT 2020


Author: Mirko Brkusanin
Date: 2020-09-18T15:26:24+02:00
New Revision: ae36c02ad0cb0a618c8715404dcfab4cf49c6612

URL: https://github.com/llvm/llvm-project/commit/ae36c02ad0cb0a618c8715404dcfab4cf49c6612
DIFF: https://github.com/llvm/llvm-project/commit/ae36c02ad0cb0a618c8715404dcfab4cf49c6612.diff

LOG: [AMDGPU] Set DS alignment requirements to be more strict

Alignment requirements for ds_read/write_b96/b128 for gfx9 and onward are
now the same as for other GCN subtargets. This way we can avoid any
unintentional use of these instructions on systems that do not support dword
alignment and instead require natural alignment.
This also makes 'SH_MEM_CONFIG.alignment_mode == STRICT' the default.

Differential Revision: https://reviews.llvm.org/D87821

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll
    llvm/test/CodeGen/AMDGPU/ds_read2.ll
    llvm/test/CodeGen/AMDGPU/ds_write2.ll
    llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll
    llvm/test/CodeGen/AMDGPU/load-local.128.ll
    llvm/test/CodeGen/AMDGPU/load-local.96.ll
    llvm/test/CodeGen/AMDGPU/store-local.128.ll
    llvm/test/CodeGen/AMDGPU/store-local.96.ll
    llvm/test/CodeGen/AMDGPU/store-local.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index aa90f537396e..dde095b56fd9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1437,9 +1437,10 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
       AddrSpace == AMDGPUAS::REGION_ADDRESS) {
     // Check if alignment requirements for ds_read/write instructions are
     // disabled.
-    if (Subtarget->hasUnalignedDSAccessEnabled()) {
+    if (Subtarget->hasUnalignedDSAccessEnabled() &&
+        !Subtarget->hasLDSMisalignedBug()) {
       if (IsFast)
-        *IsFast = true;
+        *IsFast = Alignment != Align(2);
       return true;
     }
 
@@ -1455,10 +1456,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
     }
     if (Size == 96) {
       // ds_read/write_b96 require 16-byte alignment on gfx8 and older.
-      bool Aligned = Alignment >= Align((Subtarget->hasUnalignedDSAccess() &&
-                                         !Subtarget->hasLDSMisalignedBug())
-                                            ? 4
-                                            : 16);
+      bool Aligned = Alignment >= Align(16);
       if (IsFast)
         *IsFast = Aligned;
 
@@ -1468,10 +1466,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
       // ds_read/write_b128 require 16-byte alignment on gfx8 and older, but we
       // can do a 8 byte aligned, 16 byte access in a single operation using
       // ds_read2/write2_b64.
-      bool Aligned = Alignment >= Align((Subtarget->hasUnalignedDSAccess() &&
-                                         !Subtarget->hasLDSMisalignedBug())
-                                            ? 4
-                                            : 8);
+      bool Aligned = Alignment >= Align(8);
       if (IsFast)
         *IsFast = Aligned;
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll
index 7d5a49cfd38d..b0f664726b26 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SPLIT %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SPLIT %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode < %s | FileCheck -check-prefixes=GCN,VECT %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode,+unaligned-access-mode < %s | FileCheck -check-prefixes=UNALIGNED %s
 
 ; GCN-LABEL: test_local_misaligned_v2:
 ; GCN-DAG: ds_read2_b32
@@ -21,12 +22,12 @@ bb:
 }
 
 ; GCN-LABEL: test_local_misaligned_v4:
-; VECT-DAG: ds_read_b128
-; VECT-DAG: ds_write_b128
-; SPLIT-DAG: ds_read2_b32
-; SPLIT-DAG: ds_read2_b32
-; SPLIT-DAG: ds_write2_b32
-; SPLIT-DAG: ds_write2_b32
+; GCN-DAG: ds_read2_b32
+; GCN-DAG: ds_read2_b32
+; GCN-DAG: ds_write2_b32
+; GCN-DAG: ds_write2_b32
+; UNALIGNED-DAG: ds_read_b128
+; UNALIGNED-DAG: ds_write_b128
 define amdgpu_kernel void @test_local_misaligned_v4(i32 addrspace(3)* %arg) {
 bb:
   %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -46,12 +47,12 @@ bb:
 }
 
 ; GCN-LABEL: test_local_misaligned_v3:
-; VECT-DAG: ds_read_b96
-; VECT-DAG: ds_write_b96
-; SPLIT-DAG: ds_read2_b32
-; SPLIT-DAG: ds_read_b32
-; SPLIT-DAG: ds_write2_b32
-; SPLIT-DAG: ds_write_b32
+; GCN-DAG: ds_read2_b32
+; GCN-DAG: ds_read_b32
+; GCN-DAG: ds_write2_b32
+; GCN-DAG: ds_write_b32
+; UNALIGNED-DAG: ds_read_b96
+; UNALIGNED-DAG: ds_write_b96
 define amdgpu_kernel void @test_local_misaligned_v3(i32 addrspace(3)* %arg) {
 bb:
   %lid = tail call i32 @llvm.amdgcn.workitem.id.x()

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
index 88ab22bd5df6..3f0f84a0e9b6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
@@ -1,9 +1,10 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0  %s -o - | FileCheck -check-prefix=SI %s
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -mattr=-enable-ds128 -O0 -run-pass=legalizer -global-isel-abort=0  %s -o - | FileCheck -check-prefix=CI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d  -mcpu=bonaire -mattr=+enable-ds128 -O0 -run-pass=legalizer -global-isel-abort=0  %s -o - | FileCheck -check-prefix=CI-DS128 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer  -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer  -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -mattr=+enable-ds128 -O0 -run-pass=legalizer -global-isel-abort=0  %s -o - | FileCheck -check-prefix=CI-DS128 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -mattr=-unaligned-access-mode -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -mattr=+unaligned-access-mode -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9-UNALIGNED %s
 
 ---
 name: test_load_local_s1_align1
@@ -46,6 +47,13 @@ body: |
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
     ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
     ; GFX9: $vgpr0 = COPY [[AND]](s32)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s1_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[AND]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s1) = G_LOAD %0 :: (load 1, align 1, addrspace 3)
     %2:_(s32) = G_ZEXT %1
@@ -93,6 +101,13 @@ body: |
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
     ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
     ; GFX9: $vgpr0 = COPY [[AND]](s32)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s2_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[AND]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s2) = G_LOAD %0 :: (load 1, align 1, addrspace 3)
     %2:_(s32) = G_ZEXT %1
@@ -130,6 +145,11 @@ body: |
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
     ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s8_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s8) = G_LOAD %0 :: (load 1, align 4, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -167,6 +187,11 @@ body: |
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
     ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s8_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s8) = G_LOAD %0 :: (load 1, align 1, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -204,6 +229,11 @@ body: |
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
     ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s16_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s16) = G_LOAD %0 :: (load 2, align 4, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -241,6 +271,11 @@ body: |
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
     ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s16_align2
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s16) = G_LOAD %0 :: (load 2, align 2, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -339,6 +374,11 @@ body: |
     ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s16_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s16) = G_LOAD %0 :: (load 2, align 1, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -371,6 +411,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
     ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s32_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 3)
     $vgpr0 = COPY %1
@@ -457,6 +501,10 @@ body: |
     ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
     ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: $vgpr0 = COPY [[OR]](s32)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s32_align2
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 2, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load 4, align 2, addrspace 3)
     $vgpr0 = COPY %1
@@ -623,6 +671,10 @@ body: |
     ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
     ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9: $vgpr0 = COPY [[OR2]](s32)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s32_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load 4, align 1, addrspace 3)
     $vgpr0 = COPY %1
@@ -659,6 +711,11 @@ body: |
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3)
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
     ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align8
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load 3, align 8, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -696,6 +753,11 @@ body: |
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
     ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -763,6 +825,17 @@ body: |
     ; GFX9: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align2
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load 2, addrspace 3)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 + 2, align 2, addrspace 3)
+    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load 3, align 2, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -830,6 +903,20 @@ body: |
     ; GFX9: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 + 2, addrspace 3)
+    ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[AND]]
+    ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY2]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load 3, align 1, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -867,6 +954,11 @@ body: |
     ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
     ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
     ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](s64)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s48_align8
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[COPY1]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 3)
     %2:_(s64) = G_ANYEXT %1
@@ -899,6 +991,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
     ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align8
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -930,6 +1026,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
     ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -1081,6 +1181,10 @@ body: |
     ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
     ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
     ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align2
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 2, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 8, align 2, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -1421,6 +1525,10 @@ body: |
     ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
     ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
     ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 8, align 1, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -1837,6 +1945,11 @@ body: |
     ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
     ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align16
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 3)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -1894,9 +2007,20 @@ body: |
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_local_s96_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 8, addrspace 3)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 + 8, align 8, addrspace 3)
+    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align8
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 8, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load 12, align 8, addrspace 3)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -1954,9 +2078,20 @@ body: |
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_local_s96_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 + 8, addrspace 3)
+    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -2178,6 +2313,11 @@ body: |
     ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
     ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align2
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 2, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load 12, align 2, addrspace 3)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -2594,6 +2734,11 @@ body: |
     ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
     ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 3)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -3112,6 +3257,11 @@ body: |
     ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
     ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align16
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p3) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -3156,6 +3306,11 @@ body: |
     ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3)
     ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align8
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p3) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load 16, align 8, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -3217,9 +3372,24 @@ body: |
     ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     ; GFX9-LABEL: name: test_load_local_s128_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 + 4, addrspace 3)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 + 8, addrspace 3)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 + 12, addrspace 3)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p3) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -3486,6 +3656,11 @@ body: |
     ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
     ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align2
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 2, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p3) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load 16, align 2, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -4004,6 +4179,11 @@ body: |
     ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
     ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128)
     %0:_(p3) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -4035,6 +4215,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
     ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align8
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     %0:_(p3) = COPY $vgpr0
     %1:_(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -4066,6 +4250,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
     ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     %0:_(p3) = COPY $vgpr0
     %1:_(p1) = G_LOAD %0 :: (load 8, align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -4217,6 +4405,10 @@ body: |
     ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
     ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
     ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align2
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 2, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     %0:_(p3) = COPY $vgpr0
     %1:_(p1) = G_LOAD %0 :: (load 8, align 2, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -4557,6 +4749,10 @@ body: |
     ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
     ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
     ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     %0:_(p3) = COPY $vgpr0
     %1:_(p1) = G_LOAD %0 :: (load 8, align 1, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -4588,6 +4784,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
     ; GFX9: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_p3_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](p3)
     %0:_(p3) = COPY $vgpr0
     %1:_(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 3)
     $vgpr0 = COPY %1
@@ -4679,6 +4879,10 @@ body: |
     ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32)
     ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_p3_align2
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load 4, align 2, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](p3)
     %0:_(p3) = COPY $vgpr0
     %1:_(p3) = G_LOAD %0 :: (load 4, align 2, addrspace 3)
     $vgpr0 = COPY %1
@@ -4850,6 +5054,10 @@ body: |
     ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
     ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p3)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_p3_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load 4, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](p3)
     %0:_(p3) = COPY $vgpr0
     %1:_(p3) = G_LOAD %0 :: (load 4, align 1, addrspace 3)
     $vgpr0 = COPY %1
@@ -4881,6 +5089,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
     ; GFX9: $vgpr0 = COPY [[LOAD]](p5)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_p5_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](p5)
     %0:_(p3) = COPY $vgpr0
     %1:_(p5) = G_LOAD %0 :: (load 4, align 4, addrspace 3)
     $vgpr0 = COPY %1
@@ -4972,6 +5184,10 @@ body: |
     ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32)
     ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_p5_align2
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load 4, align 2, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](p5)
     %0:_(p3) = COPY $vgpr0
     %1:_(p5) = G_LOAD %0 :: (load 4, align 2, addrspace 3)
     $vgpr0 = COPY %1
@@ -5143,6 +5359,10 @@ body: |
     ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32)
     ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_p5_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load 4, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](p5)
     %0:_(p3) = COPY $vgpr0
     %1:_(p5) = G_LOAD %0 :: (load 4, align 1, addrspace 3)
     $vgpr0 = COPY %1
@@ -5255,6 +5475,25 @@ body: |
     ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s8_align2
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; GFX9-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; GFX9-UNALIGNED: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 3)
     %2:_(s16) = G_BITCAST %1
@@ -5326,6 +5565,19 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
     ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32)
     ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s8_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 3)
     %2:_(<2 x s32>) = G_ANYEXT %1
@@ -5368,6 +5620,12 @@ body: |
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
     ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0
     ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s8_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 1)
+    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[INSERT]](<4 x s8>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, addrspace 1, align 4)
     %2:_(<4 x s8>) = G_IMPLICIT_DEF
@@ -5411,6 +5669,12 @@ body: |
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
     ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0
     ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s8_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[INSERT]](<4 x s8>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 1, addrspace 3)
     %2:_(<4 x s8>) = G_IMPLICIT_DEF
@@ -5506,6 +5770,24 @@ body: |
     ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
     ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s8_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-UNALIGNED: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 4, addrspace 3)
     $vgpr0 = COPY %1
@@ -5656,6 +5938,37 @@ body: |
     ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
     ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
     ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v8s8_align8
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-UNALIGNED: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-UNALIGNED: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; GFX9-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-UNALIGNED: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9-UNALIGNED: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<8 x s8>) = G_LOAD %0 :: (load 8, align 8, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -5998,6 +6311,59 @@ body: |
     ; GFX9: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>)
     ; GFX9: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v16s8_align16
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-UNALIGNED: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-UNALIGNED: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; GFX9-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-UNALIGNED: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9-UNALIGNED: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
+    ; GFX9-UNALIGNED: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; GFX9-UNALIGNED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
+    ; GFX9-UNALIGNED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; GFX9-UNALIGNED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
+    ; GFX9-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>)
+    ; GFX9-UNALIGNED: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; GFX9-UNALIGNED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+    ; GFX9-UNALIGNED: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; GFX9-UNALIGNED: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
+    ; GFX9-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>)
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 1, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -6029,6 +6395,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
     ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s16_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 3)
     $vgpr0 = COPY %1
@@ -6114,6 +6484,10 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
     ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
     ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s16_align2
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, align 2, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 2, addrspace 3)
     $vgpr0 = COPY %1
@@ -6294,6 +6668,10 @@ body: |
     ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
     ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s16_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 1, addrspace 3)
     $vgpr0 = COPY %1
@@ -6345,6 +6723,14 @@ body: |
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
     ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align8
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
+    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 8, addrspace 3)
     %2:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -6517,6 +6903,12 @@ body: |
     ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
     ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
     ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align2
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 2, addrspace 3)
+    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 3)
     %2:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -6825,6 +7217,12 @@ body: |
     ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
     ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
     ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 3)
     %2:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -6857,6 +7255,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
     ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align8
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -6888,6 +7290,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
     ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -7038,6 +7444,10 @@ body: |
     ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
     ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
     ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align2
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, align 2, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -7363,6 +7773,10 @@ body: |
     ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
     ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
     ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 1, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -7394,6 +7808,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
     ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align8
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -7425,6 +7843,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
     ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -7571,6 +7993,10 @@ body: |
     ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
     ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align2
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 2, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 2, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -7857,6 +8283,10 @@ body: |
     ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 1, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -8268,6 +8698,10 @@ body: |
     ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
     ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s32_align16
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 1, addrspace 3)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -8321,8 +8755,18 @@ body: |
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
     ; GFX9-LABEL: name: test_load_local_v3s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 + 8, addrspace 3)
+    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s32_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -8362,6 +8806,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, addrspace 3)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align16
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 16, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -8401,6 +8849,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align8
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 8, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -8458,8 +8910,22 @@ body: |
     ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_local_v4s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 + 4, addrspace 3)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 + 8, addrspace 3)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 + 12, addrspace 3)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -8721,6 +9187,10 @@ body: |
     ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32)
     ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align2
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 2, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 2, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -9234,6 +9704,10 @@ body: |
     ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 1, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -9297,6 +9771,14 @@ body: |
     ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 16 + 16, addrspace 3)
     ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v8s32_align32
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 16 + 16, addrspace 3)
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<8 x s32>) = G_LOAD %0 :: (load 32, align 32, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -9328,6 +9810,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v16s32_align32
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<16 x s32>) = G_LOAD %0 :: (load 16, align 32, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
@@ -9373,8 +9859,16 @@ body: |
     ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-LABEL: name: test_load_local_v2s64_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 + 8, align 4, addrspace 3)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s64_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -9994,6 +10488,10 @@ body: |
     ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
     ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s64_align16
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 1, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -10067,6 +10565,18 @@ body: |
     ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
     ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s64_align32
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 + 16, align 16, addrspace 3)
+    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
+    ; GFX9-UNALIGNED: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
+    ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 32, addrspace 3)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -10132,6 +10642,14 @@ body: |
     ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load 16 + 16, addrspace 3)
     ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s64_align32
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load 16 + 16, addrspace 3)
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 32, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -10193,9 +10711,24 @@ body: |
     ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     ; GFX9-LABEL: name: test_load_local_v2p1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 + 4, addrspace 3)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 + 8, addrspace 3)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
+    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 + 12, addrspace 3)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2p1_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -10227,6 +10760,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
     ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2p3_align8
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -10258,6 +10795,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
     ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s32_from_1_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load 1, align 4, addrspace 3)
     $vgpr0 = COPY %1
@@ -10289,6 +10830,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
     ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s32_from_2_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s32) = G_LOAD %0 :: (load 2, align 4, addrspace 3)
     $vgpr0 = COPY %1
@@ -10326,6 +10871,11 @@ body: |
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
     ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -10362,6 +10912,11 @@ body: |
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
     ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -10398,6 +10953,11 @@ body: |
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
     ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_4_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; GFX9-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 4, align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -10449,6 +11009,14 @@ body: |
     ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
     ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
+    ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s128_from_4_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     %0:_(p3) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load 4, align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -10485,6 +11053,11 @@ body: |
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
     ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align2
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -10521,6 +11094,11 @@ body: |
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
     ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -10552,6 +11130,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 1, addrspace 3)
     ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 1, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -10583,6 +11165,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 2, addrspace 3)
     ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align2
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 2, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 2, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -10614,6 +11200,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
     ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -10645,6 +11235,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 6, align 4, addrspace 3)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v3s32_from_6_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 6, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s32>) = G_LOAD %0 :: (load 6, align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -10676,6 +11270,10 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+    ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v4s32_from_8_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -11467,6 +12065,18 @@ body: |
     ; GFX9: [[COPY27:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY26]](s96)
     ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY27]](s96)
+    ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s96_from_24_align1
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 12 + 12, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 1, addrspace 3)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -11886,6 +12496,18 @@ body: |
     ; GFX9: [[COPY15:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY14]](s96)
     ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY15]](s96)
+    ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s96_from_24_align2
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 2, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 12 + 12, align 2, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 2, addrspace 3)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -11994,16 +12616,39 @@ body: |
     ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96)
     ; GFX9-LABEL: name: test_extload_local_v2s96_from_24_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 12 + 12, align 4, addrspace 3)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
-    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 + 8, addrspace 3)
+    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY1]], [[LOAD]](<2 x s32>), 0
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 + 12, align 4, addrspace 3)
+    ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
+    ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 + 20, addrspace 3)
+    ; GFX9: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD2]](<2 x s32>), 0
+    ; GFX9: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96)
+    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96)
+    ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s96_from_24_align4
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 12 + 12, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 4, addrspace 3)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -12106,12 +12751,30 @@ body: |
     ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 12 + 12, align 4, addrspace 3)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 + 12, align 4, addrspace 3)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32)
+    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 + 20, addrspace 3)
+    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD1]](<2 x s32>), 0
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD2]](s32), 64
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
     ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
     ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
     ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
+    ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s96_from_24_align16
+    ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 16, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 12 + 12, align 4, addrspace 3)
+    ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 16, addrspace 3)
     %2:_(s96) = G_EXTRACT %1, 0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll
index 09cc9e5be137..6cc9dbd1793b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll
@@ -244,7 +244,9 @@ define <4 x i32> @load_lds_v4i32_align4(<4 x i32> addrspace(3)* %ptr) {
 ; GFX9-LABEL: load_lds_v4i32_align4:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ds_read_b128 v[0:3], v0
+; GFX9-NEXT:    v_mov_b32_e32 v2, v0
+; GFX9-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
+; GFX9-NEXT:    ds_read2_b32 v[2:3], v2 offset0:2 offset1:3
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll
index f9abdb029664..d461a767b15f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll
@@ -202,7 +202,9 @@ define <3 x i32> @load_lds_v3i32_align4(<3 x i32> addrspace(3)* %ptr) {
 ; GFX9-LABEL: load_lds_v3i32_align4:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ds_read_b96 v[0:2], v0
+; GFX9-NEXT:    v_mov_b32_e32 v2, v0
+; GFX9-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
+; GFX9-NEXT:    ds_read_b32 v2, v2 offset:8
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -223,7 +225,9 @@ define <3 x i32> @load_lds_v3i32_align8(<3 x i32> addrspace(3)* %ptr) {
 ; GFX9-LABEL: load_lds_v3i32_align8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ds_read_b96 v[0:2], v0
+; GFX9-NEXT:    v_mov_b32_e32 v2, v0
+; GFX9-NEXT:    ds_read_b64 v[0:1], v0
+; GFX9-NEXT:    ds_read_b32 v2, v2 offset:8
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll
index 5b078d41e8d8..3ad1caa0f0ef 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll
@@ -212,12 +212,13 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out,
 ; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x34
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v4, s4
+; GFX9-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
-; GFX9-NEXT:    v_mov_b32_e32 v1, s1
-; GFX9-NEXT:    v_mov_b32_e32 v2, s2
-; GFX9-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-NEXT:    ds_write_b128 v4, v[0:3]
+; GFX9-NEXT:    v_mov_b32_e32 v2, s1
+; GFX9-NEXT:    ds_write2_b32 v1, v0, v2 offset1:1
+; GFX9-NEXT:    v_mov_b32_e32 v3, s2
+; GFX9-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-NEXT:    ds_write2_b32 v1, v3, v0 offset0:2 offset1:3
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: store_lds_v4i32_align4:

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll
index 538c146601bd..e0512775ecd4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll
@@ -178,11 +178,12 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out,
 ; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x34
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v3, s4
+; GFX9-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
-; GFX9-NEXT:    v_mov_b32_e32 v2, s2
-; GFX9-NEXT:    ds_write_b96 v3, v[0:2]
+; GFX9-NEXT:    v_mov_b32_e32 v3, s2
+; GFX9-NEXT:    ds_write2_b32 v2, v0, v1 offset1:1
+; GFX9-NEXT:    ds_write_b32 v2, v3 offset:8
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: store_lds_v3i32_align4:
@@ -208,11 +209,12 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out,
 ; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x34
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v3, s4
+; GFX9-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
-; GFX9-NEXT:    v_mov_b32_e32 v2, s2
-; GFX9-NEXT:    ds_write_b96 v3, v[0:2]
+; GFX9-NEXT:    v_mov_b32_e32 v3, s2
+; GFX9-NEXT:    ds_write_b64 v2, v[0:1]
+; GFX9-NEXT:    ds_write_b32 v2, v3 offset:8
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: store_lds_v3i32_align8:

diff  --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
index 47ae95eefea9..2ef7b1d1403c 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
@@ -480,7 +480,11 @@ define amdgpu_kernel void @load_constant_disjoint_offsets(i32 addrspace(1)* %out
 ; GCN-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], bar at abs32@lo{{$}}
 ; CI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]] offset1:1
 ; CI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:2 offset1:3
-; GFX9: ds_read_b128 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]]
+
+; GFX9-ALIGNED-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]] offset1:1
+; GFX9-ALIGNED-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:2 offset1:3
+
+; GFX9-UNALIGNED: ds_read_b128 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]]
 define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
   %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
   %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4

diff  --git a/llvm/test/CodeGen/AMDGPU/ds_write2.ll b/llvm/test/CodeGen/AMDGPU/ds_write2.ll
index dce2884d77c3..a4a5d47c9e05 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_write2.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_write2.ll
@@ -441,7 +441,11 @@ define amdgpu_kernel void @store_constant_disjoint_offsets() {
 ; GCN-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], bar at abs32@lo{{$}}
 ; CI-DAG: ds_write2_b32 [[PTR]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
 ; CI-DAG: ds_write2_b32 [[PTR]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
-; GFX9-DAG: ds_write_b128 [[PTR]], {{v\[[0-9]+:[0-9]+\]}}
+
+; GFX9-ALIGNED-DAG: ds_write2_b32 [[PTR]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
+; GFX9-ALIGNED-DAG: ds_write2_b32 [[PTR]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
+
+; GFX9-UNALIGNED: ds_write_b128 [[PTR]], {{v\[[0-9]+:[0-9]+\]}}
 
 ; GCN: s_endpgm
 define amdgpu_kernel void @store_misaligned64_constant_offsets() {
@@ -514,7 +518,11 @@ define amdgpu_kernel void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %ld
 
 ; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset1:1{{$}}
 ; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:2 offset1:3{{$}}
-; GFX9: ds_write_b128 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
+
+; GFX9-ALIGNED-DAG: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset1:1{{$}}
+; GFX9-ALIGNED-DAG: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:2 offset1:3{{$}}
+
+; GFX9-UNALIGNED: ds_write_b128 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(<4 x float> addrspace(3)* %out, <4 x float> addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %in

diff  --git a/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll b/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll
index 1e5dcffdedd7..76ee829ad793 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SPLIT %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SPLIT %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode < %s | FileCheck -check-prefixes=GCN,VECT %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode,+unaligned-access-mode < %s | FileCheck -check-prefixes=UNALIGNED,VECT %s
 
 ; GCN-LABEL: test_local_misaligned_v2:
 ; GCN-DAG: ds_read2_b32
@@ -21,12 +22,12 @@ bb:
 }
 
 ; GCN-LABEL: test_local_misaligned_v4:
-; VECT-DAG: ds_read_b128
-; VECT-DAG: ds_write_b128
-; SPLIT-DAG: ds_read2_b32
-; SPLIT-DAG: ds_read2_b32
-; SPLIT-DAG: ds_write2_b32
-; SPLIT-DAG: ds_write2_b32
+; GCN-DAG: ds_read2_b32
+; GCN-DAG: ds_read2_b32
+; GCN-DAG: ds_write2_b32
+; GCN-DAG: ds_write2_b32
+; UNALIGNED-DAG: ds_read_b128
+; UNALIGNED-DAG: ds_write_b128
 define amdgpu_kernel void @test_local_misaligned_v4(i32 addrspace(3)* %arg) {
 bb:
   %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -46,12 +47,12 @@ bb:
 }
 
 ; GCN-LABEL: test_local_misaligned_v3:
-; VECT-DAG: ds_read_b96
-; VECT-DAG: ds_write_b96
-; SPLIT-DAG: ds_read2_b32
-; SPLIT-DAG: ds_read_b32
-; SPLIT-DAG: ds_write2_b32
-; SPLIT-DAG: ds_write_b32
+; GCN-DAG: ds_read2_b32
+; GCN-DAG: ds_read_b32
+; GCN-DAG: ds_write2_b32
+; GCN-DAG: ds_write_b32
+; UNALIGNED-DAG: ds_read_b96
+; UNALIGNED-DAG: ds_write_b96
 define amdgpu_kernel void @test_local_misaligned_v3(i32 addrspace(3)* %arg) {
 bb:
   %lid = tail call i32 @llvm.amdgcn.workitem.id.x()

diff  --git a/llvm/test/CodeGen/AMDGPU/load-local.128.ll b/llvm/test/CodeGen/AMDGPU/load-local.128.ll
index e7f2ddb4fd1a..716237a9955f 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local.128.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local.128.ll
@@ -288,7 +288,9 @@ define <4 x i32> @load_lds_v4i32_align4(<4 x i32> addrspace(3)* %ptr) {
 ; GFX9-LABEL: load_lds_v4i32_align4:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ds_read_b128 v[0:3], v0
+; GFX9-NEXT:    v_mov_b32_e32 v2, v0
+; GFX9-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
+; GFX9-NEXT:    ds_read2_b32 v[2:3], v2 offset0:2 offset1:3
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;

diff  --git a/llvm/test/CodeGen/AMDGPU/load-local.96.ll b/llvm/test/CodeGen/AMDGPU/load-local.96.ll
index d210d4edeba1..14c9cbbe8c07 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local.96.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local.96.ll
@@ -239,7 +239,9 @@ define <3 x i32> @load_lds_v3i32_align4(<3 x i32> addrspace(3)* %ptr) {
 ; GFX9-LABEL: load_lds_v3i32_align4:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ds_read_b96 v[0:2], v0
+; GFX9-NEXT:    v_mov_b32_e32 v2, v0
+; GFX9-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
+; GFX9-NEXT:    ds_read_b32 v2, v2 offset:8
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -272,7 +274,9 @@ define <3 x i32> @load_lds_v3i32_align8(<3 x i32> addrspace(3)* %ptr) {
 ; GFX9-LABEL: load_lds_v3i32_align8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ds_read_b96 v[0:2], v0
+; GFX9-NEXT:    v_mov_b32_e32 v2, v0
+; GFX9-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
+; GFX9-NEXT:    ds_read_b32 v2, v2 offset:8
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;

diff  --git a/llvm/test/CodeGen/AMDGPU/store-local.128.ll b/llvm/test/CodeGen/AMDGPU/store-local.128.ll
index 80658fa9ed75..ebb200458265 100644
--- a/llvm/test/CodeGen/AMDGPU/store-local.128.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-local.128.ll
@@ -290,12 +290,13 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out,
 ; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x34
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v4, s4
-; GFX9-NEXT:    v_mov_b32_e32 v0, s0
-; GFX9-NEXT:    v_mov_b32_e32 v1, s1
-; GFX9-NEXT:    v_mov_b32_e32 v2, s2
-; GFX9-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-NEXT:    ds_write_b128 v4, v[0:3]
+; GFX9-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-NEXT:    v_mov_b32_e32 v1, s0
+; GFX9-NEXT:    v_mov_b32_e32 v2, s1
+; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
+; GFX9-NEXT:    v_mov_b32_e32 v3, s2
+; GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-NEXT:    ds_write2_b32 v0, v3, v1 offset0:2 offset1:3
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: store_lds_v4i32_align4:

diff  --git a/llvm/test/CodeGen/AMDGPU/store-local.96.ll b/llvm/test/CodeGen/AMDGPU/store-local.96.ll
index 41fdb1cbd61b..7a47bb8d9149 100644
--- a/llvm/test/CodeGen/AMDGPU/store-local.96.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-local.96.ll
@@ -244,11 +244,12 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out,
 ; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x34
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v3, s4
-; GFX9-NEXT:    v_mov_b32_e32 v0, s0
-; GFX9-NEXT:    v_mov_b32_e32 v1, s1
-; GFX9-NEXT:    v_mov_b32_e32 v2, s2
-; GFX9-NEXT:    ds_write_b96 v3, v[0:2]
+; GFX9-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-NEXT:    v_mov_b32_e32 v1, s0
+; GFX9-NEXT:    v_mov_b32_e32 v2, s1
+; GFX9-NEXT:    v_mov_b32_e32 v3, s2
+; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
+; GFX9-NEXT:    ds_write_b32 v0, v3 offset:8
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: store_lds_v3i32_align4:
@@ -288,11 +289,12 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out,
 ; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x34
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v3, s4
+; GFX9-NEXT:    v_mov_b32_e32 v2, s4
+; GFX9-NEXT:    v_mov_b32_e32 v3, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
-; GFX9-NEXT:    v_mov_b32_e32 v2, s2
-; GFX9-NEXT:    ds_write_b96 v3, v[0:2]
+; GFX9-NEXT:    ds_write_b32 v2, v3 offset:8
+; GFX9-NEXT:    ds_write_b64 v2, v[0:1]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX7-LABEL: store_lds_v3i32_align8:

diff  --git a/llvm/test/CodeGen/AMDGPU/store-local.ll b/llvm/test/CodeGen/AMDGPU/store-local.ll
index 8984e22bed5b..f302ea099b75 100644
--- a/llvm/test/CodeGen/AMDGPU/store-local.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-local.ll
@@ -179,9 +179,8 @@ entry:
 ; CM: LDS_WRITE
 ; CM: LDS_WRITE
 
-; SICIVI: ds_write2_b32
-; SICIVI: ds_write2_b32
-; GFX9: ds_write_b128
+; GCN: ds_write2_b32
+; GCN: ds_write2_b32
 define amdgpu_kernel void @store_local_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
 entry:
   store <4 x i32> %in, <4 x i32> addrspace(3)* %out, align 4


        


More information about the llvm-commits mailing list