[llvm] e46badd - GlobalISel: Have lowerLoad scalarize unaligned vectors

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 30 10:23:37 PDT 2021


Author: Matt Arsenault
Date: 2021-07-30T13:23:29-04:00
New Revision: e46badd4e9cd2bf2f412b1a857f62ecc88bde5a1

URL: https://github.com/llvm/llvm-project/commit/e46badd4e9cd2bf2f412b1a857f62ecc88bde5a1
DIFF: https://github.com/llvm/llvm-project/commit/e46badd4e9cd2bf2f412b1a857f62ecc88bde5a1.diff

LOG: GlobalISel: Have lowerLoad scalarize unaligned vectors

This could be smarter by picking an ideal type, or at least splitting
the vector in half first. Also handles lower for non-power-of-2,
non-extending vector loads.

Currently this just avoids failing to legalize some odd vector AMDGPU
tests, but is a step towards removing the split logic from the
NarrowScalar logic.

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 4dddb98b47d5..074e5e6920a4 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2886,13 +2886,14 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
   MachineMemOperand &MMO = LoadMI.getMMO();
   LLT MemTy = MMO.getMemoryType();
   MachineFunction &MF = MIRBuilder.getMF();
-  if (MemTy.isVector())
-    return UnableToLegalize;
 
   unsigned MemSizeInBits = MemTy.getSizeInBits();
   unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
 
   if (MemSizeInBits != MemStoreSizeInBits) {
+    if (MemTy.isVector())
+      return UnableToLegalize;
+
     // Promote to a byte-sized load if not loading an integral number of
     // bytes.  For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
     LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
@@ -2928,9 +2929,6 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
     return Legalized;
   }
 
-  if (DstTy.isVector())
-    return UnableToLegalize;
-
   // Big endian lowering not implemented.
   if (MIRBuilder.getDataLayout().isBigEndian())
     return UnableToLegalize;
@@ -2953,9 +2951,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
   uint64_t LargeSplitSize, SmallSplitSize;
 
   if (!isPowerOf2_32(MemSizeInBits)) {
+    // This load needs splitting into power of 2 sized loads.
     LargeSplitSize = PowerOf2Floor(MemSizeInBits);
     SmallSplitSize = MemSizeInBits - LargeSplitSize;
   } else {
+    // This is already a power of 2, but we still need to split this in half.
+    //
     // Assume we're being asked to decompose an unaligned load.
     // TODO: If this requires multiple splits, handle them all at once.
     auto &Ctx = MF.getFunction().getContext();
@@ -2965,6 +2966,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
     SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
   }
 
+  if (MemTy.isVector()) {
+    // TODO: Handle vector extloads
+    if (MemTy != DstTy)
+      return UnableToLegalize;
+
+    // TODO: We can do better than scalarizing the vector and at least split it
+    // in half.
+    return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
+  }
+
   MachineMemOperand *LargeMMO =
       MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
   MachineMemOperand *SmallMMO =

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
index eaac2ef51df0..0a404dc571bc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
@@ -4443,31 +4443,120 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v3s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p4) :: (load (<3 x s16>), align 4, addrspace 4)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4)
+    ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_constant_v3s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p4) :: (load (<3 x s16>), align 4, addrspace 4)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4)
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v3s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p4) :: (load (<3 x s16>), align 4, addrspace 4)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4)
+    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4)
+    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 4)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
index e6747716dda1..c9b4aad635ff 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
@@ -4461,31 +4461,120 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v3s16_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p0) :: (load (<3 x s16>), align 4)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4)
+    ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_flat_v3s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p0) :: (load (<3 x s16>), align 4)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4)
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v3s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p0) :: (load (<3 x s16>), align 4)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4)
+    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
+    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 0)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
index 4d885cdd0637..451557766b6b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
@@ -7557,58 +7557,236 @@ body: |
     ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; SI-LABEL: name: test_load_global_v3s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v3s16_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v3s16_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_global_v3s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 1)
     %2:_(<3 x s16>) = G_IMPLICIT_DEF
@@ -7667,13 +7845,47 @@ body: |
     ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v3s16_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 2, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v3s16_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
@@ -7762,13 +7974,34 @@ body: |
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 2, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
@@ -7887,13 +8120,47 @@ body: |
     ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v3s16_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 1, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1)
+    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
+    ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v3s16_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
@@ -8036,13 +8303,34 @@ body: |
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 1, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1)
+    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
+    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
@@ -8873,64 +9161,218 @@ body: |
     ; SI: $vgpr2 = COPY [[BITCAST5]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v5s16_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 8, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
-    ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>)
-    ; CI-HSA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>)
-    ; CI-HSA: $vgpr0 = COPY [[UV6]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[UV7]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
+    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
+    ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
+    ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v5s16_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 8, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
-    ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>)
-    ; CI-MESA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>)
-    ; CI-MESA: $vgpr0 = COPY [[UV6]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[UV7]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[UV8]](<2 x s16>)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
+    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
+    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
+    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v5s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 8, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>)
-    ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>)
-    ; VI: $vgpr0 = COPY [[UV6]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[UV7]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[UV8]](<2 x s16>)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 8, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
-    ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>)
-    ; GFX9-HSA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>)
-    ; GFX9-HSA: $vgpr0 = COPY [[UV6]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[UV7]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 8, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
-    ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>)
-    ; GFX9-MESA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>)
-    ; GFX9-MESA: $vgpr0 = COPY [[UV6]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[UV7]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[UV8]](<2 x s16>)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 8, addrspace 1)
     %2:_(<5 x s16>) = G_IMPLICIT_DEF
@@ -8994,64 +9436,218 @@ body: |
     ; SI: $vgpr2 = COPY [[BITCAST5]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v5s16_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 4, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
-    ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>)
-    ; CI-HSA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>)
-    ; CI-HSA: $vgpr0 = COPY [[UV6]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[UV7]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
+    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
+    ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
+    ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v5s16_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 4, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
-    ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>)
-    ; CI-MESA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>)
-    ; CI-MESA: $vgpr0 = COPY [[UV6]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[UV7]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[UV8]](<2 x s16>)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
+    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
+    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
+    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v5s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 4, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>)
-    ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>)
-    ; VI: $vgpr0 = COPY [[UV6]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[UV7]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[UV8]](<2 x s16>)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 4, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
-    ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>)
-    ; GFX9-HSA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>)
-    ; GFX9-HSA: $vgpr0 = COPY [[UV6]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[UV7]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 4, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
-    ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>)
-    ; GFX9-MESA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>)
-    ; GFX9-MESA: $vgpr0 = COPY [[UV6]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[UV7]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[UV8]](<2 x s16>)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 4, addrspace 1)
     %2:_(<5 x s16>) = G_IMPLICIT_DEF
@@ -9119,16 +9715,52 @@ body: |
     ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v5s16_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 2, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
-    ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>)
-    ; CI-HSA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>)
-    ; CI-HSA: $vgpr0 = COPY [[UV6]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[UV7]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
+    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
+    ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
+    ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v5s16_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
@@ -9227,16 +9859,39 @@ body: |
     ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 2, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
-    ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>)
-    ; GFX9-HSA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>)
-    ; GFX9-HSA: $vgpr0 = COPY [[UV6]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[UV7]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
@@ -9386,16 +10041,52 @@ body: |
     ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v5s16_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 1, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
-    ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>)
-    ; CI-HSA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>)
-    ; CI-HSA: $vgpr0 = COPY [[UV6]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[UV7]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1)
+    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
+    ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1)
+    ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1)
+    ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
+    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
+    ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
+    ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v5s16_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
@@ -9578,16 +10269,39 @@ body: |
     ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 1, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
-    ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[UV]](<5 x s16>)
-    ; GFX9-HSA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>)
-    ; GFX9-HSA: $vgpr0 = COPY [[UV6]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[UV7]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1)
+    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
+    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1)
+    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1)
+    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
@@ -10628,82 +11342,342 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v7s16_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 8, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
-    ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>)
-    ; SI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>)
-    ; SI: $vgpr0 = COPY [[UV8]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[UV9]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[UV10]](<2 x s16>)
-    ; SI: $vgpr3 = COPY [[UV11]](<2 x s16>)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]]
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]]
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; SI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; SI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v7s16_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 8, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
-    ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>)
-    ; CI-HSA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>)
-    ; CI-HSA: $vgpr0 = COPY [[UV8]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[UV9]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[UV10]](<2 x s16>)
-    ; CI-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; CI-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; CI-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; CI-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]]
+    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]]
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]]
+    ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]]
+    ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-HSA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v7s16_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 8, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
-    ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>)
-    ; CI-MESA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>)
-    ; CI-MESA: $vgpr0 = COPY [[UV8]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[UV9]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[UV10]](<2 x s16>)
-    ; CI-MESA: $vgpr3 = COPY [[UV11]](<2 x s16>)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]]
+    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]]
+    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]]
+    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]]
+    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-MESA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v7s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 8, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>)
-    ; VI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>)
-    ; VI: $vgpr0 = COPY [[UV8]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[UV9]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[UV10]](<2 x s16>)
-    ; VI: $vgpr3 = COPY [[UV11]](<2 x s16>)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; VI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 8, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
-    ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>)
-    ; GFX9-HSA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>)
-    ; GFX9-HSA: $vgpr0 = COPY [[UV8]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[UV9]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[UV10]](<2 x s16>)
-    ; GFX9-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; GFX9-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; GFX9-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; GFX9-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
+    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 8, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
-    ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>)
-    ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>)
-    ; GFX9-MESA: $vgpr0 = COPY [[UV8]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[UV9]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[UV10]](<2 x s16>)
-    ; GFX9-MESA: $vgpr3 = COPY [[UV11]](<2 x s16>)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1)
+    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
+    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 8, addrspace 1)
     %2:_(<7 x s16>) = G_IMPLICIT_DEF
@@ -10724,82 +11698,342 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v7s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 4, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
-    ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>)
-    ; SI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>)
-    ; SI: $vgpr0 = COPY [[UV8]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[UV9]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[UV10]](<2 x s16>)
-    ; SI: $vgpr3 = COPY [[UV11]](<2 x s16>)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]]
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]]
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; SI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; SI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v7s16_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 4, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
-    ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>)
-    ; CI-HSA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>)
-    ; CI-HSA: $vgpr0 = COPY [[UV8]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[UV9]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[UV10]](<2 x s16>)
-    ; CI-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; CI-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; CI-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; CI-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]]
+    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]]
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]]
+    ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]]
+    ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-HSA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v7s16_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 4, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
-    ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>)
-    ; CI-MESA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>)
-    ; CI-MESA: $vgpr0 = COPY [[UV8]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[UV9]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[UV10]](<2 x s16>)
-    ; CI-MESA: $vgpr3 = COPY [[UV11]](<2 x s16>)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]]
+    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]]
+    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]]
+    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]]
+    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-MESA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v7s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 4, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>)
-    ; VI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>)
-    ; VI: $vgpr0 = COPY [[UV8]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[UV9]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[UV10]](<2 x s16>)
-    ; VI: $vgpr3 = COPY [[UV11]](<2 x s16>)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; VI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 4, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
-    ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>)
-    ; GFX9-HSA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>)
-    ; GFX9-HSA: $vgpr0 = COPY [[UV8]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[UV9]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[UV10]](<2 x s16>)
-    ; GFX9-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; GFX9-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; GFX9-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; GFX9-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
+    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 4, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
-    ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>)
-    ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>)
-    ; GFX9-MESA: $vgpr0 = COPY [[UV8]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[UV9]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[UV10]](<2 x s16>)
-    ; GFX9-MESA: $vgpr3 = COPY [[UV11]](<2 x s16>)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
+    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
+    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 4, addrspace 1)
     %2:_(<7 x s16>) = G_IMPLICIT_DEF
@@ -10977,17 +12211,66 @@ body: |
     ; SI: $vgpr3 = COPY [[BITCAST23]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v7s16_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 2, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
-    ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>)
-    ; CI-HSA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>)
-    ; CI-HSA: $vgpr0 = COPY [[UV8]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[UV9]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[UV10]](<2 x s16>)
-    ; CI-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; CI-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; CI-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; CI-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]]
+    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]]
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]]
+    ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]]
+    ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-HSA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v7s16_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
@@ -11304,17 +12587,49 @@ body: |
     ; VI: $vgpr3 = COPY [[BITCAST23]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 2, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
-    ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>)
-    ; GFX9-HSA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>)
-    ; GFX9-HSA: $vgpr0 = COPY [[UV8]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[UV9]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[UV10]](<2 x s16>)
-    ; GFX9-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1)
+    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1)
+    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
+    ; GFX9-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; GFX9-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1)
+    ; GFX9-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
+    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
+    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
@@ -11649,17 +12964,66 @@ body: |
     ; SI: $vgpr3 = COPY [[BITCAST23]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v7s16_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 1, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
-    ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>)
-    ; CI-HSA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>)
-    ; CI-HSA: $vgpr0 = COPY [[UV8]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[UV9]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[UV10]](<2 x s16>)
-    ; CI-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1)
+    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
+    ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1)
+    ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1)
+    ; CI-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; CI-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CI-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, align 1, addrspace 1)
+    ; CI-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; CI-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CI-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 1, addrspace 1)
+    ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]]
+    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]]
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]]
+    ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]]
+    ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+    ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+    ; CI-HSA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v7s16_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
@@ -12078,17 +13442,49 @@ body: |
     ; VI: $vgpr3 = COPY [[BITCAST23]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 1, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
-    ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[UV]](<7 x s16>)
-    ; GFX9-HSA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<14 x s16>)
-    ; GFX9-HSA: $vgpr0 = COPY [[UV8]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[UV9]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[UV10]](<2 x s16>)
-    ; GFX9-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, align 1, addrspace 1)
+    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
+    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, align 1, addrspace 1)
+    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1)
+    ; GFX9-HSA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+    ; GFX9-HSA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; GFX9-HSA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, align 1, addrspace 1)
+    ; GFX9-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+    ; GFX9-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; GFX9-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 1, addrspace 1)
+    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
+    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
index 509c0ba1b376..6606f50c85ec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
@@ -10541,13 +10541,34 @@ body: |
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 2, addrspace 3)
-    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3)
+    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX10-LABEL: name: test_load_local_v3s16_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
@@ -10966,13 +10987,34 @@ body: |
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 1, addrspace 3)
-    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+    ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX9-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 1, addrspace 3)
+    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX10-LABEL: name: test_load_local_v3s16_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)


        


More information about the llvm-commits mailing list