[llvm] 18b2180 - AMDGPU/GlobalISel: Legalize odd sized loads with widening

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 20 13:16:57 PDT 2020


Author: Matt Arsenault
Date: 2020-08-20T16:15:53-04:00
New Revision: 18b218007db69627bb651bb2a548afe92d615cd3

URL: https://github.com/llvm/llvm-project/commit/18b218007db69627bb651bb2a548afe92d615cd3
DIFF: https://github.com/llvm/llvm-project/commit/18b218007db69627bb651bb2a548afe92d615cd3.diff

LOG: AMDGPU/GlobalISel: Legalize odd sized loads with widening

Custom lower and widen odd sized loads up to the alignment. The
default set of legalization actions doesn't have a way to represent
this. This fixes naturally aligned <3 x s8> and <3 x s16> loads.

This also starts moving towards eliminating the buggy and
overcomplicated legalization rules for narrowing. All the memory size
changes should be done in the lower or custom action, not NarrowScalar
/ FewerElements. These currently have redundant and ambiguous code
with the lower action.

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 62cd45817c44..52a87c769dc7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -360,6 +360,52 @@ static bool shouldBitcastLoadStoreType(const GCNSubtarget &ST, const LLT Ty,
          !isRegisterVectorElementType(Ty.getElementType());
 }
 
+/// Return true if we should legalize a load by widening an odd sized memory
+/// access up to the alignment. Note this case when the memory access itself
+/// changes, not the size of the result register.
+static bool shouldWidenLoad(const GCNSubtarget &ST, unsigned SizeInBits,
+                            unsigned AlignInBits, unsigned AddrSpace,
+                            unsigned Opcode) {
+  // We don't want to widen cases that are naturally legal.
+  if (isPowerOf2_32(SizeInBits))
+    return false;
+
+  // If we have 96-bit memory operations, we shouldn't touch them. Note we may
+  // end up widening these for a scalar load during RegBankSelect, since there
+  // aren't 96-bit scalar loads.
+  if (SizeInBits == 96 && ST.hasDwordx3LoadStores())
+    return false;
+
+  if (SizeInBits >= maxSizeForAddrSpace(ST, AddrSpace, Opcode))
+    return false;
+
+  // A load is known dereferenceable up to the alignment, so it's legal to widen
+  // to it.
+  //
+  // TODO: Could check dereferenceable for less aligned cases.
+  unsigned RoundedSize = NextPowerOf2(SizeInBits);
+  if (AlignInBits < RoundedSize)
+    return false;
+
+  // Do not widen if it would introduce a slow unaligned load.
+  const SITargetLowering *TLI = ST.getTargetLowering();
+  bool Fast = false;
+  return TLI->allowsMisalignedMemoryAccessesImpl(
+             RoundedSize, AddrSpace, Align(AlignInBits / 8),
+             MachineMemOperand::MOLoad, &Fast) &&
+         Fast;
+}
+
+static bool shouldWidenLoad(const GCNSubtarget &ST, const LegalityQuery &Query,
+                            unsigned Opcode) {
+  if (Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic)
+    return false;
+
+  return shouldWidenLoad(ST, Query.MMODescrs[0].SizeInBits,
+                         Query.MMODescrs[0].AlignInBits,
+                         Query.Types[1].getAddressSpace(), Opcode);
+}
+
 AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
                                          const GCNTargetMachine &TM)
   :  ST(ST_) {
@@ -1005,24 +1051,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
     return false;
   };
 
-  const auto shouldWidenLoadResult = [=](const LegalityQuery &Query,
-                                         unsigned Opc) -> bool {
-    unsigned Size = Query.Types[0].getSizeInBits();
-    if (isPowerOf2_32(Size))
-      return false;
-
-    if (Size == 96 && ST.hasDwordx3LoadStores())
-      return false;
-
-    unsigned AddrSpace = Query.Types[1].getAddressSpace();
-    if (Size >= maxSizeForAddrSpace(ST, AddrSpace, Opc))
-      return false;
-
-    unsigned Align = Query.MMODescrs[0].AlignInBits;
-    unsigned RoundedSize = NextPowerOf2(Size);
-    return (Align >= RoundedSize);
-  };
-
   unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32;
   unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16;
   unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8;
@@ -1087,19 +1115,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
                                           Query.MMODescrs[0].SizeInBits);
       }, bitcastToRegisterType(0));
 
+    if (!IsStore) {
+      // Widen suitably aligned loads by loading extra bytes. The standard
+      // legalization actions can't properly express widening memory operands.
+      Actions.customIf([=](const LegalityQuery &Query) -> bool {
+        return shouldWidenLoad(ST, Query, G_LOAD);
+      });
+    }
+
+    // FIXME: load/store narrowing should be moved to lower action
     Actions
-        .customIf(typeIs(1, Constant32Ptr))
-        // Widen suitably aligned loads by loading extra elements.
-        .moreElementsIf([=](const LegalityQuery &Query) {
-            const LLT Ty = Query.Types[0];
-            return Op == G_LOAD && Ty.isVector() &&
-                   shouldWidenLoadResult(Query, Op);
-          }, moreElementsToNextPow2(0))
-        .widenScalarIf([=](const LegalityQuery &Query) {
-            const LLT Ty = Query.Types[0];
-            return Op == G_LOAD && !Ty.isVector() &&
-                   shouldWidenLoadResult(Query, Op);
-          }, widenScalarOrEltToNextPow2(0))
         .narrowScalarIf(
             [=](const LegalityQuery &Query) -> bool {
               return !Query.Types[0].isVector() &&
@@ -1205,15 +1230,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
               // May need relegalization for the scalars.
               return std::make_pair(0, EltTy);
             })
-        .minScalar(0, S32);
+    .lowerIfMemSizeNotPow2()
+    .minScalar(0, S32);
 
     if (IsStore)
       Actions.narrowScalarIf(isWideScalarTruncStore(0), changeTo(0, S32));
 
-    // TODO: Need a bitcast lower option?
     Actions
         .widenScalarToNextPow2(0)
-        .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0));
+        .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0))
+        .lower();
   }
 
   auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
@@ -2303,6 +2329,12 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
   return true;
 }
 
+static LLT widenToNextPowerOf2(LLT Ty) {
+  if (Ty.isVector())
+    return Ty.changeNumElements(PowerOf2Ceil(Ty.getNumElements()));
+  return LLT::scalar(PowerOf2Ceil(Ty.getSizeInBits()));
+}
+
 bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper,
                                        MachineInstr &MI) const {
   MachineIRBuilder &B = Helper.MIRBuilder;
@@ -2322,6 +2354,66 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper,
     return true;
   }
 
+  Register ValReg = MI.getOperand(0).getReg();
+  LLT ValTy = MRI.getType(ValReg);
+
+  MachineMemOperand *MMO = *MI.memoperands_begin();
+  const unsigned ValSize = ValTy.getSizeInBits();
+  const unsigned MemSize = 8 * MMO->getSize();
+  const Align MemAlign = MMO->getAlign();
+  const unsigned AlignInBits = 8 * MemAlign.value();
+
+  // Widen non-power-of-2 loads to the alignment if needed
+  if (shouldWidenLoad(ST, MemSize, AlignInBits, AddrSpace, MI.getOpcode())) {
+    const unsigned WideMemSize = PowerOf2Ceil(MemSize);
+
+    // This was already the correct extending load result type, so just adjust
+    // the memory type.
+    if (WideMemSize == ValSize) {
+      MachineFunction &MF = B.getMF();
+
+      // FIXME: This is losing AA metadata
+      MachineMemOperand *WideMMO =
+          MF.getMachineMemOperand(MMO, 0, WideMemSize / 8);
+      Observer.changingInstr(MI);
+      MI.setMemRefs(MF, {WideMMO});
+      Observer.changedInstr(MI);
+      return true;
+    }
+
+    // Don't bother handling edge case that should probably never be produced.
+    if (ValSize > WideMemSize)
+      return false;
+
+    LLT WideTy = widenToNextPowerOf2(ValTy);
+
+    // FIXME: This is losing AA metadata
+    Register WideLoad;
+    if (!WideTy.isVector()) {
+      WideLoad = B.buildLoadFromOffset(WideTy, PtrReg, *MMO, 0).getReg(0);
+      B.buildTrunc(ValReg, WideLoad).getReg(0);
+    } else {
+      // Extract the subvector.
+
+      if (isRegisterType(ValTy)) {
+        // If this a case where G_EXTRACT is legal, use it.
+        // (e.g. <3 x s32> -> <4 x s32>)
+        WideLoad = B.buildLoadFromOffset(WideTy, PtrReg, *MMO, 0).getReg(0);
+        B.buildExtract(ValReg, WideLoad, 0);
+      } else {
+        // For cases where the widened type isn't a nice register value, unmerge
+        // from a widened register (e.g. <3 x s16> -> <4 x s16>)
+        B.setInsertPt(B.getMBB(), ++B.getInsertPt());
+        WideLoad = Helper.widenWithUnmerge(WideTy, ValReg);
+        B.setInsertPt(B.getMBB(), MI.getIterator());
+        B.buildLoadFromOffset(WideLoad, PtrReg, *MMO, 0);
+      }
+    }
+
+    MI.eraseFromParent();
+    return true;
+  }
+
   return false;
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
index a4c444518595..3e19d5c9d9a0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
@@ -634,34 +634,29 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s24_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 8, addrspace 4)
-    ; CI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, align 8, addrspace 4)
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: $vgpr0 = COPY [[COPY1]](s32)
     ; VI-LABEL: name: test_load_constant_s24_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 8, addrspace 4)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, align 8, addrspace 4)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_constant_s24_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 8, addrspace 4)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, align 8, addrspace 4)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     ; CI-MESA-LABEL: name: test_load_constant_s24_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 8, addrspace 4)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, align 8, addrspace 4)
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-MESA-LABEL: name: test_load_constant_s24_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 8, addrspace 4)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, align 8, addrspace 4)
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s24) = G_LOAD %0 :: (load 3, align 8, addrspace 4)
     %2:_(s32) = G_ANYEXT %1
@@ -676,34 +671,29 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s24_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 4, addrspace 4)
-    ; CI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: $vgpr0 = COPY [[COPY1]](s32)
     ; VI-LABEL: name: test_load_constant_s24_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 4, addrspace 4)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_constant_s24_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 4, addrspace 4)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     ; CI-MESA-LABEL: name: test_load_constant_s24_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 4, addrspace 4)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-MESA-LABEL: name: test_load_constant_s24_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 4, addrspace 4)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 4)
     %2:_(s32) = G_ANYEXT %1
@@ -852,34 +842,39 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s48_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4)
-    ; CI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; CI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-    ; CI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4)
+    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; CI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; CI: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; VI-LABEL: name: test_load_constant_s48_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-    ; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4)
+    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; VI: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; GFX9-LABEL: name: test_load_constant_s48_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4)
+    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; CI-MESA-LABEL: name: test_load_constant_s48_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4)
+    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; GFX9-MESA-LABEL: name: test_load_constant_s48_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4)
+    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 4)
     %2:_(s64) = G_ZEXT %1
@@ -6101,7 +6096,7 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v3s16_align8
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4)
+    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4)
     ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -6109,7 +6104,7 @@ body: |
     ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; VI-LABEL: name: test_load_constant_v3s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4)
+    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4)
     ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -6117,7 +6112,7 @@ body: |
     ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v3s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4)
     ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -6125,7 +6120,7 @@ body: |
     ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; CI-MESA-LABEL: name: test_load_constant_v3s16_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4)
     ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -6133,7 +6128,7 @@ body: |
     ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_constant_v3s16_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4)
     ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -8914,43 +8909,38 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_v3s64_align32
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 24, align 32, addrspace 4)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>)
+    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
+    ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
     ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0
+    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
     ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_constant_v3s64_align32
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 24, align 32, addrspace 4)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>)
+    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
+    ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0
+    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
     ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_constant_v3s64_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 24, align 32, addrspace 4)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
+    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; CI-MESA-LABEL: name: test_load_constant_v3s64_align32
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 24, align 32, addrspace 4)
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
+    ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
     ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0
+    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
     ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-MESA-LABEL: name: test_load_constant_v3s64_align32
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 24, align 32, addrspace 4)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
+    ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
     ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0
+    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
     ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 32, addrspace 4)
@@ -12286,24 +12276,29 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_constant_s64_from_1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_constant_s64_from_1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_constant_s64_from_1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_constant_s64_from_1_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_constant_s64_from_1_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -12317,24 +12312,29 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_constant_s64_from_2_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_constant_s64_from_2_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_constant_s64_from_2_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_constant_s64_from_2_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_constant_s64_from_2_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -12348,24 +12348,29 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_constant_s64_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_constant_s64_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_constant_s64_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_constant_s64_from_4_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_constant_s64_from_4_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load 4, align 4, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -12379,24 +12384,44 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_constant_s128_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; CI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; VI-LABEL: name: test_ext_load_constant_s128_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; GFX9-LABEL: name: test_ext_load_constant_s128_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; CI-MESA-LABEL: name: test_ext_load_constant_s128_from_4_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; CI-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CI-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; GFX9-MESA-LABEL: name: test_ext_load_constant_s128_from_4_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4)
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_LOAD %0 :: (load 4, align 4, addrspace 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -12410,24 +12435,29 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_constant_s64_from_2_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_constant_s64_from_2_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_constant_s64_from_2_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_constant_s64_from_2_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_constant_s64_from_2_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 4)
     $vgpr0_vgpr1 = COPY %1
@@ -12441,24 +12471,29 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_constant_s64_from_1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_constant_s64_from_1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_constant_s64_from_1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_constant_s64_from_1_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_constant_s64_from_1_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4)
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 4)
     $vgpr0_vgpr1 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
index 034f3903c653..54def72ad9da 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
@@ -634,34 +634,39 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_s48_align8
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8)
-    ; CI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; CI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-    ; CI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8)
+    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; CI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; CI: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; VI-LABEL: name: test_load_flat_s48_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-    ; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8)
+    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; VI: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; GFX9-LABEL: name: test_load_flat_s48_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8)
+    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; CI-MESA-LABEL: name: test_load_flat_s48_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8)
+    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; GFX9-MESA-LABEL: name: test_load_flat_s48_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8)
+    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 0)
     %2:_(s64) = G_ZEXT %1
@@ -5923,7 +5928,7 @@ body: |
 
     ; CI-LABEL: name: test_load_flat_v3s16_align8
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 8)
+    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8)
     ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -5931,7 +5936,7 @@ body: |
     ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; VI-LABEL: name: test_load_flat_v3s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 8)
+    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8)
     ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -5939,7 +5944,7 @@ body: |
     ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v3s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 8)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8)
     ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -5947,7 +5952,7 @@ body: |
     ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; CI-MESA-LABEL: name: test_load_flat_v3s16_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 8)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8)
     ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -5955,7 +5960,7 @@ body: |
     ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_flat_v3s16_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 8)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8)
     ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -11782,24 +11787,29 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_flat_s64_from_1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_flat_s64_from_1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_flat_s64_from_1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_flat_s64_from_1_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_flat_s64_from_1_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -11813,24 +11823,29 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_flat_s64_from_2_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_flat_s64_from_2_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_flat_s64_from_2_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_flat_s64_from_2_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_flat_s64_from_2_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -11844,24 +11859,29 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_flat_s64_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_flat_s64_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_flat_s64_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_flat_s64_from_4_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 4)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4)
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_flat_s64_from_4_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 4)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4)
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load 4, align 4, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -11875,24 +11895,44 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_flat_s128_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 4)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; CI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; VI-LABEL: name: test_ext_load_flat_s128_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 4)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; GFX9-LABEL: name: test_ext_load_flat_s128_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 4)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; CI-MESA-LABEL: name: test_ext_load_flat_s128_from_4_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 4)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; CI-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CI-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; GFX9-MESA-LABEL: name: test_ext_load_flat_s128_from_4_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 4)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4)
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_LOAD %0 :: (load 4, align 4, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -11906,24 +11946,29 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_flat_s64_from_2_align2
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_flat_s64_from_2_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_flat_s64_from_2_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_flat_s64_from_2_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_flat_s64_from_2_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 0)
     $vgpr0_vgpr1 = COPY %1
@@ -11937,24 +11982,29 @@ body: |
 
     ; CI-LABEL: name: test_ext_load_flat_s64_from_1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_flat_s64_from_1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_flat_s64_from_1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_flat_s64_from_1_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_flat_s64_from_1_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4)
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 0)
     $vgpr0_vgpr1 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
index 8bb7b2a7c20b..0abd624724a1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
@@ -667,40 +667,34 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s24_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 8, addrspace 1)
-    ; SI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 8, addrspace 1)
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: $vgpr0 = COPY [[COPY1]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s24_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 8, addrspace 1)
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 8, addrspace 1)
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-HSA: $vgpr0 = COPY [[COPY1]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s24_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 8, addrspace 1)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 8, addrspace 1)
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32)
     ; VI-LABEL: name: test_load_global_s24_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 8, addrspace 1)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 8, addrspace 1)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s24_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 8, addrspace 1)
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 8, addrspace 1)
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s24_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 8, addrspace 1)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 8, addrspace 1)
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s24) = G_LOAD %0 :: (load 3, align 8, addrspace 1)
     %2:_(s32) = G_ANYEXT %1
@@ -715,40 +709,34 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s24_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1)
-    ; SI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: $vgpr0 = COPY [[COPY1]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s24_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1)
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-HSA: $vgpr0 = COPY [[COPY1]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s24_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32)
     ; VI-LABEL: name: test_load_global_s24_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s24_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1)
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s24_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 1)
     %2:_(s32) = G_ANYEXT %1
@@ -774,10 +762,15 @@ body: |
     ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s24_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 2, addrspace 1)
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1)
+    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 2, align 2, addrspace 1)
+    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+    ; CI-HSA: $vgpr0 = COPY [[COPY1]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s24_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
@@ -802,10 +795,15 @@ body: |
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s24_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 2, addrspace 1)
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1)
+    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 2, align 2, addrspace 1)
+    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+    ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s24_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
@@ -842,10 +840,18 @@ body: |
     ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s24_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 1, addrspace 1)
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1)
+    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 2, addrspace 1)
+    ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C2]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[AND]]
+    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+    ; CI-HSA: $vgpr0 = COPY [[COPY2]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s24_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1)
@@ -870,10 +876,18 @@ body: |
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s24_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 1, addrspace 1)
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1)
+    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 2, addrspace 1)
+    ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C2]](s32)
+    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[AND]]
+    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+    ; GFX9-HSA: $vgpr0 = COPY [[COPY2]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s24_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1)
@@ -906,40 +920,46 @@ body: |
     ; CI: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; SI-LABEL: name: test_load_global_s48_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1)
-    ; SI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; SI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-    ; SI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; SI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; SI: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; CI-HSA-LABEL: name: test_load_global_s48_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1)
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; CI-HSA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; CI-HSA: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; CI-MESA-LABEL: name: test_load_global_s48_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; VI-LABEL: name: test_load_global_s48_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-    ; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; VI: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; GFX9-HSA-LABEL: name: test_load_global_s48_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1)
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; GFX9-HSA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; GFX9-HSA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; GFX9-MESA-LABEL: name: test_load_global_s48_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 1)
     %2:_(s64) = G_ZEXT %1
@@ -1434,11 +1454,9 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s96_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[UV]](<3 x s32>)
+    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1)
+    ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0
+    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>)
     ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-HSA-LABEL: name: test_load_global_s96_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -6162,7 +6180,7 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v3s16_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1)
+    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
     ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -6170,7 +6188,7 @@ body: |
     ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v3s16_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
     ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -6178,7 +6196,7 @@ body: |
     ; CI-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v3s16_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
     ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -6186,7 +6204,7 @@ body: |
     ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; VI-LABEL: name: test_load_global_v3s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1)
+    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
     ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -6194,7 +6212,7 @@ body: |
     ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
     ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -6202,7 +6220,7 @@ body: |
     ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
     ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -7152,94 +7170,272 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v5s16_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 16, addrspace 1)
+    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1)
+    ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
     ; SI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
     ; SI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<40 x s16>)
     ; SI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF1]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>)
     ; SI: [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>), [[UV12:%[0-9]+]]:_(<5 x s16>), [[UV13:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
-    ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV]](<5 x s16>), [[UV8]](<5 x s16>)
-    ; SI: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<10 x s16>)
-    ; SI: $vgpr0 = COPY [[UV14]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[UV15]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[UV16]](<2 x s16>)
+    ; SI: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV]](<5 x s16>), 0
+    ; SI: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; SI: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV8]](<5 x s16>), 0
+    ; SI: [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>)
+    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
+    ; SI: $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
+    ; SI: $vgpr2 = COPY [[BITCAST7]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v5s16_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 16, addrspace 1)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1)
+    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
     ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
+    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
     ; CI-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<40 x s16>)
     ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; CI-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF1]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>)
     ; CI-HSA: [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>), [[UV12:%[0-9]+]]:_(<5 x s16>), [[UV13:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
-    ; CI-HSA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV]](<5 x s16>), [[UV8]](<5 x s16>)
-    ; CI-HSA: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<10 x s16>)
-    ; CI-HSA: $vgpr0 = COPY [[UV14]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[UV15]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[UV16]](<2 x s16>)
+    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV]](<5 x s16>), 0
+    ; CI-HSA: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>)
+    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
+    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
+    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+    ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CI-HSA: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV8]](<5 x s16>), 0
+    ; CI-HSA: [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>)
+    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+    ; CI-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA: $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
+    ; CI-HSA: $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
+    ; CI-HSA: $vgpr2 = COPY [[BITCAST7]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v5s16_align16
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 16, addrspace 1)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
     ; CI-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
+    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
     ; CI-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<40 x s16>)
     ; CI-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF1]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>)
     ; CI-MESA: [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>), [[UV12:%[0-9]+]]:_(<5 x s16>), [[UV13:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
-    ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV]](<5 x s16>), [[UV8]](<5 x s16>)
-    ; CI-MESA: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<10 x s16>)
-    ; CI-MESA: $vgpr0 = COPY [[UV14]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[UV15]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[UV16]](<2 x s16>)
+    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV]](<5 x s16>), 0
+    ; CI-MESA: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>)
+    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
+    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
+    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV8]](<5 x s16>), 0
+    ; CI-MESA: [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>)
+    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
+    ; CI-MESA: $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
+    ; CI-MESA: $vgpr2 = COPY [[BITCAST7]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v5s16_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 16, addrspace 1)
+    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1)
+    ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
     ; VI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
     ; VI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<40 x s16>)
     ; VI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF1]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>)
     ; VI: [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>), [[UV12:%[0-9]+]]:_(<5 x s16>), [[UV13:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
-    ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV]](<5 x s16>), [[UV8]](<5 x s16>)
-    ; VI: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<10 x s16>)
-    ; VI: $vgpr0 = COPY [[UV14]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[UV15]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[UV16]](<2 x s16>)
+    ; VI: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV]](<5 x s16>), 0
+    ; VI: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; VI: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV8]](<5 x s16>), 0
+    ; VI: [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>)
+    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
+    ; VI: $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
+    ; VI: $vgpr2 = COPY [[BITCAST7]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align16
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 16, addrspace 1)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1)
+    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
     ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
+    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
     ; GFX9-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<40 x s16>)
     ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF1]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>)
     ; GFX9-HSA: [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>), [[UV12:%[0-9]+]]:_(<5 x s16>), [[UV13:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
-    ; GFX9-HSA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV]](<5 x s16>), [[UV8]](<5 x s16>)
-    ; GFX9-HSA: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<10 x s16>)
-    ; GFX9-HSA: $vgpr0 = COPY [[UV14]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[UV15]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[UV16]](<2 x s16>)
+    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV]](<5 x s16>), 0
+    ; GFX9-HSA: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>)
+    ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
+    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9-HSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
+    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+    ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9-HSA: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV8]](<5 x s16>), 0
+    ; GFX9-HSA: [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>)
+    ; GFX9-HSA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+    ; GFX9-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align16
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 16, addrspace 1)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
     ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
+    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
     ; GFX9-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<40 x s16>)
     ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF1]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>)
     ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>), [[UV12:%[0-9]+]]:_(<5 x s16>), [[UV13:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
-    ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV]](<5 x s16>), [[UV8]](<5 x s16>)
-    ; GFX9-MESA: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<10 x s16>)
-    ; GFX9-MESA: $vgpr0 = COPY [[UV14]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[UV15]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[UV16]](<2 x s16>)
+    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV]](<5 x s16>), 0
+    ; GFX9-MESA: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>)
+    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
+    ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV8]](<5 x s16>), 0
+    ; GFX9-MESA: [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>)
+    ; GFX9-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x s16>) = G_LOAD %0 :: (load 10, align 16, addrspace 1)
     %2:_(<5 x s16>) = G_IMPLICIT_DEF
@@ -8417,11 +8613,9 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v6s16_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[UV]](<3 x s32>)
+    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1)
+    ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0
+    ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[EXTRACT]](<3 x s32>)
     ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v6s16_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -9093,94 +9287,318 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v7s16_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 16, addrspace 1)
+    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1)
+    ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
     ; SI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
     ; SI: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
     ; SI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
     ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF1]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
     ; SI: [[UV8:%[0-9]+]]:_(<7 x s16>), [[UV9:%[0-9]+]]:_(<7 x s16>), [[UV10:%[0-9]+]]:_(<7 x s16>), [[UV11:%[0-9]+]]:_(<7 x s16>), [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<56 x s16>)
-    ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV]](<7 x s16>), [[UV8]](<7 x s16>)
-    ; SI: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<14 x s16>)
-    ; SI: $vgpr0 = COPY [[UV16]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[UV17]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[UV18]](<2 x s16>)
-    ; SI: $vgpr3 = COPY [[UV19]](<2 x s16>)
+    ; SI: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF]], [[UV]](<7 x s16>), 0
+    ; SI: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<8 x s16>)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; SI: [[INSERT1:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF]], [[UV8]](<7 x s16>), 0
+    ; SI: [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<8 x s16>)
+    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV20]](<2 x s16>)
+    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+    ; SI: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+    ; SI: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
+    ; SI: $vgpr3 = COPY [[BITCAST9]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v7s16_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 16, addrspace 1)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1)
+    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
     ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
+    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
     ; CI-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
     ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
     ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF1]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
     ; CI-HSA: [[UV8:%[0-9]+]]:_(<7 x s16>), [[UV9:%[0-9]+]]:_(<7 x s16>), [[UV10:%[0-9]+]]:_(<7 x s16>), [[UV11:%[0-9]+]]:_(<7 x s16>), [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<56 x s16>)
-    ; CI-HSA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV]](<7 x s16>), [[UV8]](<7 x s16>)
-    ; CI-HSA: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<14 x s16>)
-    ; CI-HSA: $vgpr0 = COPY [[UV16]](<2 x s16>)
-    ; CI-HSA: $vgpr1 = COPY [[UV17]](<2 x s16>)
-    ; CI-HSA: $vgpr2 = COPY [[UV18]](<2 x s16>)
-    ; CI-HSA: $vgpr3 = COPY [[UV19]](<2 x s16>)
+    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF]], [[UV]](<7 x s16>), 0
+    ; CI-HSA: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<8 x s16>)
+    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>)
+    ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>)
+    ; CI-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; CI-HSA: [[INSERT1:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF]], [[UV8]](<7 x s16>), 0
+    ; CI-HSA: [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<8 x s16>)
+    ; CI-HSA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV20]](<2 x s16>)
+    ; CI-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
+    ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+    ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-HSA: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-HSA: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+    ; CI-HSA: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+    ; CI-HSA: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
+    ; CI-HSA: $vgpr3 = COPY [[BITCAST9]](<2 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v7s16_align16
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 16, addrspace 1)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
     ; CI-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
+    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
     ; CI-MESA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
     ; CI-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
     ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF1]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
     ; CI-MESA: [[UV8:%[0-9]+]]:_(<7 x s16>), [[UV9:%[0-9]+]]:_(<7 x s16>), [[UV10:%[0-9]+]]:_(<7 x s16>), [[UV11:%[0-9]+]]:_(<7 x s16>), [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<56 x s16>)
-    ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV]](<7 x s16>), [[UV8]](<7 x s16>)
-    ; CI-MESA: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<14 x s16>)
-    ; CI-MESA: $vgpr0 = COPY [[UV16]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[UV17]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[UV18]](<2 x s16>)
-    ; CI-MESA: $vgpr3 = COPY [[UV19]](<2 x s16>)
+    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF]], [[UV]](<7 x s16>), 0
+    ; CI-MESA: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<8 x s16>)
+    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>)
+    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>)
+    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF]], [[UV8]](<7 x s16>), 0
+    ; CI-MESA: [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<8 x s16>)
+    ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV20]](<2 x s16>)
+    ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
+    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-MESA: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+    ; CI-MESA: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+    ; CI-MESA: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
+    ; CI-MESA: $vgpr3 = COPY [[BITCAST9]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v7s16_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 16, addrspace 1)
+    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1)
+    ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
     ; VI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
     ; VI: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
     ; VI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
     ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF1]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
     ; VI: [[UV8:%[0-9]+]]:_(<7 x s16>), [[UV9:%[0-9]+]]:_(<7 x s16>), [[UV10:%[0-9]+]]:_(<7 x s16>), [[UV11:%[0-9]+]]:_(<7 x s16>), [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<56 x s16>)
-    ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV]](<7 x s16>), [[UV8]](<7 x s16>)
-    ; VI: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<14 x s16>)
-    ; VI: $vgpr0 = COPY [[UV16]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[UV17]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[UV18]](<2 x s16>)
-    ; VI: $vgpr3 = COPY [[UV19]](<2 x s16>)
+    ; VI: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF]], [[UV]](<7 x s16>), 0
+    ; VI: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<8 x s16>)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; VI: [[INSERT1:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF]], [[UV8]](<7 x s16>), 0
+    ; VI: [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<8 x s16>)
+    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV20]](<2 x s16>)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+    ; VI: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+    ; VI: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
+    ; VI: $vgpr3 = COPY [[BITCAST9]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align16
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 16, addrspace 1)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1)
+    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
     ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
+    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
     ; GFX9-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
     ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
     ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF1]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
     ; GFX9-HSA: [[UV8:%[0-9]+]]:_(<7 x s16>), [[UV9:%[0-9]+]]:_(<7 x s16>), [[UV10:%[0-9]+]]:_(<7 x s16>), [[UV11:%[0-9]+]]:_(<7 x s16>), [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<56 x s16>)
-    ; GFX9-HSA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV]](<7 x s16>), [[UV8]](<7 x s16>)
-    ; GFX9-HSA: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<14 x s16>)
-    ; GFX9-HSA: $vgpr0 = COPY [[UV16]](<2 x s16>)
-    ; GFX9-HSA: $vgpr1 = COPY [[UV17]](<2 x s16>)
-    ; GFX9-HSA: $vgpr2 = COPY [[UV18]](<2 x s16>)
-    ; GFX9-HSA: $vgpr3 = COPY [[UV19]](<2 x s16>)
+    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF]], [[UV]](<7 x s16>), 0
+    ; GFX9-HSA: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<8 x s16>)
+    ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9-HSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>)
+    ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9-HSA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>)
+    ; GFX9-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; GFX9-HSA: [[INSERT1:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF]], [[UV8]](<7 x s16>), 0
+    ; GFX9-HSA: [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<8 x s16>)
+    ; GFX9-HSA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV20]](<2 x s16>)
+    ; GFX9-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
+    ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align16
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 16, addrspace 1)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
     ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
+    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
     ; GFX9-MESA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>)
     ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
     ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF1]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>)
     ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<7 x s16>), [[UV9:%[0-9]+]]:_(<7 x s16>), [[UV10:%[0-9]+]]:_(<7 x s16>), [[UV11:%[0-9]+]]:_(<7 x s16>), [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<56 x s16>)
-    ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV]](<7 x s16>), [[UV8]](<7 x s16>)
-    ; GFX9-MESA: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<14 x s16>)
-    ; GFX9-MESA: $vgpr0 = COPY [[UV16]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[UV17]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[UV18]](<2 x s16>)
-    ; GFX9-MESA: $vgpr3 = COPY [[UV19]](<2 x s16>)
+    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF]], [[UV]](<7 x s16>), 0
+    ; GFX9-MESA: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<8 x s16>)
+    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+    ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF]], [[UV8]](<7 x s16>), 0
+    ; GFX9-MESA: [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<8 x s16>)
+    ; GFX9-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV20]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<7 x s16>) = G_LOAD %0 :: (load 14, align 16, addrspace 1)
     %2:_(<7 x s16>) = G_IMPLICIT_DEF
@@ -11115,11 +11533,9 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v3s32_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x s32>)
+    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1)
+    ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](<3 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v3s32_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1)
@@ -11145,8 +11561,6 @@ body: |
     $vgpr0_vgpr1_vgpr2 = COPY %1
 ...
 
-
-
 ---
 name: test_load_global_v3s32_align4
 body: |
@@ -12248,51 +12662,45 @@ body: |
 
     ; SI-LABEL: name: test_load_global_v3s64_align32
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 24, align 32, addrspace 1)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>)
+    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
+    ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
     ; SI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0
+    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
     ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; CI-HSA-LABEL: name: test_load_global_v3s64_align32
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 24, align 32, addrspace 1)
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
+    ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
     ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0
+    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
     ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; CI-MESA-LABEL: name: test_load_global_v3s64_align32
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 24, align 32, addrspace 1)
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
+    ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
     ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0
+    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
     ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_global_v3s64_align32
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 24, align 32, addrspace 1)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>)
+    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
+    ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0
+    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
     ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align32
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 24, align 32, addrspace 1)
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
+    ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
     ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0
+    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
     ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align32
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 24, align 32, addrspace 1)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
+    ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
     ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0
+    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
     ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 32, addrspace 1)
@@ -15216,28 +15624,34 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_global_s64_from_1_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_1_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
+    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_1_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_global_s64_from_1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_1_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
+    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_1_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -15251,28 +15665,34 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_global_s64_from_2_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_2_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
+    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_2_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_global_s64_from_2_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_2_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
+    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_2_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -15290,28 +15710,34 @@ body: |
     ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; SI-LABEL: name: test_ext_load_global_s64_from_4_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_4_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_4_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_global_s64_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_4_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_4_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load 4, align 4, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -15325,28 +15751,52 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_global_s128_from_4_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; SI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; SI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; CI-HSA-LABEL: name: test_ext_load_global_s128_from_4_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; CI-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-HSA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; CI-HSA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CI-HSA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; CI-MESA-LABEL: name: test_ext_load_global_s128_from_4_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; CI-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CI-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; VI-LABEL: name: test_ext_load_global_s128_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; GFX9-HSA-LABEL: name: test_ext_load_global_s128_from_4_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; GFX9-MESA-LABEL: name: test_ext_load_global_s128_from_4_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_LOAD %0 :: (load 4, align 4, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -15360,28 +15810,34 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_global_s64_from_2_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_2_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
+    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_2_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_global_s64_from_2_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_2_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
+    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_2_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -15395,28 +15851,34 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_global_s64_from_1_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_1_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
+    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_1_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_global_s64_from_1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_1_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
+    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_1_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 1)
     $vgpr0_vgpr1 = COPY %1
@@ -16730,26 +17192,23 @@ body: |
 
     ; SI-LABEL: name: test_extload_global_v2s96_from_24_align16
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[UV]](<3 x s32>)
+    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1)
+    ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0
+    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>)
     ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 8 + 12, align 4, addrspace 1)
     ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
     ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 4 + 20, addrspace 1)
-    ; SI: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF1]], [[LOAD1]](<2 x s32>), 0
+    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD1]](<2 x s32>), 0
     ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD2]](s32), 64
     ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s96>) = G_BUILD_VECTOR [[BITCAST]](s96), [[BITCAST1]](s96)
-    ; SI: [[EXTRACT:%[0-9]+]]:_(s96) = G_EXTRACT [[BUILD_VECTOR]](<2 x s96>), 0
-    ; SI: [[EXTRACT1:%[0-9]+]]:_(s96) = G_EXTRACT [[BUILD_VECTOR]](<2 x s96>), 96
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[EXTRACT1]](s96)
+    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-HSA-LABEL: name: test_extload_global_v2s96_from_24_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
index b482998727a4..5b54ac6c69b7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
@@ -636,34 +636,29 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s24_align8
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 8, addrspace 3)
-    ; SI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3)
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: $vgpr0 = COPY [[COPY1]](s32)
     ; CI-LABEL: name: test_load_local_s24_align8
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 8, addrspace 3)
-    ; CI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3)
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: $vgpr0 = COPY [[COPY1]](s32)
     ; CI-DS128-LABEL: name: test_load_local_s24_align8
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 8, addrspace 3)
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; CI-DS128: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3)
+    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-DS128: $vgpr0 = COPY [[COPY1]](s32)
     ; VI-LABEL: name: test_load_local_s24_align8
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 8, addrspace 3)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_local_s24_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 8, addrspace 3)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load 3, align 8, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -678,34 +673,29 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s24_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 3)
-    ; SI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: $vgpr0 = COPY [[COPY1]](s32)
     ; CI-LABEL: name: test_load_local_s24_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 3)
-    ; CI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: $vgpr0 = COPY [[COPY1]](s32)
     ; CI-DS128-LABEL: name: test_load_local_s24_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 3)
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; CI-DS128: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-DS128: $vgpr0 = COPY [[COPY1]](s32)
     ; VI-LABEL: name: test_load_local_s24_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 3)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_local_s24_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 3)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -854,34 +844,29 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s48_align8
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3)
-    ; SI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48)
-    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; SI: $vgpr0_vgpr1 = COPY [[COPY1]](s64)
     ; CI-LABEL: name: test_load_local_s48_align8
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3)
-    ; CI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48)
-    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+    ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; CI: $vgpr0_vgpr1 = COPY [[COPY1]](s64)
     ; CI-DS128-LABEL: name: test_load_local_s48_align8
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3)
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; CI-DS128: $vgpr0_vgpr1 = COPY [[COPY1]](s64)
     ; VI-LABEL: name: test_load_local_s48_align8
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48)
-    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](s64)
     ; GFX9-LABEL: name: test_load_local_s48_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 3)
     %2:_(s64) = G_ANYEXT %1
@@ -6278,7 +6263,7 @@ body: |
 
     ; SI-LABEL: name: test_load_local_v3s16_align8
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3)
+    ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
     ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -6286,7 +6271,7 @@ body: |
     ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; CI-LABEL: name: test_load_local_v3s16_align8
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3)
+    ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
     ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -6294,7 +6279,7 @@ body: |
     ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; CI-DS128-LABEL: name: test_load_local_v3s16_align8
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3)
+    ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
     ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -6302,7 +6287,7 @@ body: |
     ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; VI-LABEL: name: test_load_local_v3s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3)
+    ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
     ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -6310,7 +6295,7 @@ body: |
     ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_local_v3s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
     ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>)
     ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -10209,24 +10194,29 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_s64_from_1_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_extload_local_s64_from_1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-DS128-LABEL: name: test_extload_local_s64_from_1_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
+    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_extload_local_s64_from_1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_extload_local_s64_from_1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -10240,24 +10230,29 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_s64_from_2_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_extload_local_s64_from_2_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-DS128-LABEL: name: test_extload_local_s64_from_2_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
+    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_extload_local_s64_from_2_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_extload_local_s64_from_2_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -10271,24 +10266,29 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_s64_from_4_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_extload_local_s64_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-DS128-LABEL: name: test_extload_local_s64_from_4_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_extload_local_s64_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_extload_local_s64_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 4, align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -10302,24 +10302,44 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_s128_from_4_align4
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; SI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; SI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; CI-LABEL: name: test_extload_local_s128_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; CI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; CI-DS128-LABEL: name: test_extload_local_s128_from_4_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; CI-DS128: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-DS128: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; CI-DS128: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CI-DS128: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; VI-LABEL: name: test_extload_local_s128_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; GFX9-LABEL: name: test_extload_local_s128_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     %0:_(p3) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load 4, align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -10333,24 +10353,29 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_s64_from_2_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_extload_local_s64_from_2_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-DS128-LABEL: name: test_extload_local_s64_from_2_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
+    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_extload_local_s64_from_2_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_extload_local_s64_from_2_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1
@@ -10364,24 +10389,29 @@ body: |
 
     ; SI-LABEL: name: test_extload_local_s64_from_1_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_extload_local_s64_from_1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-DS128-LABEL: name: test_extload_local_s64_from_1_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
+    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_extload_local_s64_from_1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_extload_local_s64_from_1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p3) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 3)
     $vgpr0_vgpr1 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir
new file mode 100644
index 000000000000..4cf01f99e1c7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir
@@ -0,0 +1,104 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s
+
+--- |
+
+  define i32 @widen_load_range0_tbaa(i24 addrspace(1)* %ptr) {
+    %load = load i24, i24 addrspace(1)* %ptr, !range !0, !tbaa !1
+    %zext = zext i24 %load to i32
+    ret i32 %zext
+  }
+
+  define i32 @widen_load_range1_tbaa(i24 addrspace(1)* %ptr) {
+    %load = load i24, i24 addrspace(1)* %ptr, !range !0, !tbaa !1
+    %zext = zext i24 %load to i32
+    ret i32 %zext
+  }
+
+  define i32 @widen_load_tbaa0(i24 addrspace(1)* %ptr) {
+    %load = load i24, i24 addrspace(1)* %ptr, !tbaa !1
+    %zext = zext i24 %load to i32
+    ret i32 %zext
+  }
+
+  define i32 @widen_load_tbaa1(i24 addrspace(1)* %ptr) {
+    %load = load i24, i24 addrspace(1)* %ptr, !tbaa !1
+    %zext = zext i24 %load to i32
+    ret i32 %zext
+  }
+
+  !0 = !{i24 0, i24 1048575}
+  !1 = !{!"omnipotent char", !2}
+  !2 = !{!"Simple C/C++ TBAA"}
+...
+
+# Make sure range metadata is not preserved when widening loads, but
+# tbaa is.
+---
+name: widen_load_range0_tbaa
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; SI-LABEL: name: widen_load_range0_tbaa
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; SI: $vgpr0 = COPY [[AND]](s32)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 1, !range !0, !tbaa !1)
+    %2:_(s32) = G_ZEXT %1
+    $vgpr0 = COPY %2
+
+...
+
+# Result register type already matches the widened memory type.
+---
+name: widen_load_range1_tbaa
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; SI-LABEL: name: widen_load_range1_tbaa
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s32) = G_LOAD %0 :: (load 3, align 4, addrspace 1, !range !0, !tbaa !1)
+    $vgpr0 = COPY %1
+
+...
+---
+name: widen_load_tbaa0
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; SI-LABEL: name: widen_load_tbaa0
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; SI: $vgpr0 = COPY [[AND]](s32)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 1, !tbaa !1)
+    %2:_(s32) = G_ZEXT %1
+    $vgpr0 = COPY %2
+
+...
+
+# Result register type already matches the widened memory type.
+---
+name: widen_load_tbaa1
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; SI-LABEL: name: widen_load_tbaa1
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s32) = G_LOAD %0 :: (load 3, align 4, addrspace 1, !tbaa !1)
+    $vgpr0 = COPY %1
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
index 7521d3ca96b3..1659ce532ade 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
@@ -525,6 +525,38 @@ body: |
     $vgpr0 = COPY %1
 ...
 
+---
+name: test_load_private_s24_align8
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; SI-LABEL: name: test_load_private_s24_align8
+    ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5)
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: $vgpr0 = COPY [[COPY1]](s32)
+    ; CI-LABEL: name: test_load_private_s24_align8
+    ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5)
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: $vgpr0 = COPY [[COPY1]](s32)
+    ; VI-LABEL: name: test_load_private_s24_align8
+    ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
+    ; GFX9-LABEL: name: test_load_private_s24_align8
+    ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
+    %0:_(p5) = COPY $vgpr0
+    %1:_(s24) = G_LOAD %0 :: (load 3, align 8, addrspace 5)
+    %2:_(s32) = G_ANYEXT %1
+    $vgpr0 = COPY %2
+...
+
 ---
 name: test_load_private_s24_align4
 body: |
@@ -533,28 +565,24 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s24_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 3, align 4, addrspace 5)
-    ; SI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: $vgpr0 = COPY [[COPY1]](s32)
     ; CI-LABEL: name: test_load_private_s24_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 3, align 4, addrspace 5)
-    ; CI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: $vgpr0 = COPY [[COPY1]](s32)
     ; VI-LABEL: name: test_load_private_s24_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 3, align 4, addrspace 5)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_private_s24_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 3, align 4, addrspace 5)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 5)
     %2:_(s32) = G_ANYEXT %1
@@ -9135,20 +9163,24 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_private_s64_from_1_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_ext_load_private_s64_from_1_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_private_s64_from_1_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_private_s64_from_1_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -9162,20 +9194,24 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_private_s64_from_2_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_ext_load_private_s64_from_2_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_private_s64_from_2_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_private_s64_from_2_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -9189,20 +9225,24 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_private_s64_from_4_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_ext_load_private_s64_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_private_s64_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_private_s64_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -9216,20 +9256,36 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_private_s128_from_4_align4
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; SI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; SI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; CI-LABEL: name: test_ext_load_private_s128_from_4_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; CI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; VI-LABEL: name: test_ext_load_private_s128_from_4_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     ; GFX9-LABEL: name: test_ext_load_private_s128_from_4_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128)
     %0:_(p5) = COPY $vgpr0
     %1:_(s128) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -9243,20 +9299,24 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_private_s64_from_2_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_ext_load_private_s64_from_2_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_private_s64_from_2_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_private_s64_from_2_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 5)
     $vgpr0_vgpr1 = COPY %1
@@ -9270,20 +9330,24 @@ body: |
 
     ; SI-LABEL: name: test_ext_load_private_s64_from_1_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5)
-    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; CI-LABEL: name: test_ext_load_private_s64_from_1_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5)
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; VI-LABEL: name: test_ext_load_private_s64_from_1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5)
-    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     ; GFX9-LABEL: name: test_ext_load_private_s64_from_1_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 5)
     $vgpr0_vgpr1 = COPY %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
index 5ebbd48f06ed..1e46b8f8acf0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
@@ -508,8 +508,15 @@ body: |
     ; CI-LABEL: name: test_store_global_s48_align1
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[COPY1]](s64)
-    ; CI: G_STORE [[TRUNC]](s48), [[COPY]](p1) :: (store 6, align 1, addrspace 1)
+    ; CI: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
+    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; CI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
+    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
+    ; CI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, align 1, addrspace 1)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; CI: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store 2 + 4, align 1, addrspace 1)
     ; VI-LABEL: name: test_store_global_s48_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
@@ -549,8 +556,15 @@ body: |
     ; GFX9-LABEL: name: test_store_global_s48_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[COPY1]](s64)
-    ; GFX9: G_STORE [[TRUNC]](s48), [[COPY]](p1) :: (store 6, align 1, addrspace 1)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
+    ; GFX9: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, align 1, addrspace 1)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; GFX9: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store 2 + 4, align 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s48) = G_TRUNC %1
@@ -585,8 +599,15 @@ body: |
     ; CI-LABEL: name: test_store_global_s48_align2
     ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[COPY1]](s64)
-    ; CI: G_STORE [[TRUNC]](s48), [[COPY]](p1) :: (store 6, align 2, addrspace 1)
+    ; CI: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
+    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; CI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
+    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
+    ; CI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, align 2, addrspace 1)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; CI: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store 2 + 4, addrspace 1)
     ; VI-LABEL: name: test_store_global_s48_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
@@ -609,8 +630,15 @@ body: |
     ; GFX9-LABEL: name: test_store_global_s48_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[COPY1]](s64)
-    ; GFX9: G_STORE [[TRUNC]](s48), [[COPY]](p1) :: (store 6, align 2, addrspace 1)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
+    ; GFX9: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, align 2, addrspace 1)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; GFX9: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store 2 + 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s48) = G_TRUNC %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
index 55641ca31f75..758d5b01c978 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
@@ -637,15 +637,67 @@ body: |
     ; SI-LABEL: name: test_store_global_v3s8_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>)
-    ; SI: G_STORE [[BITCAST]](s24), [[COPY]](p1) :: (store 3, align 4, addrspace 1)
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C3]](s32)
+    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1)
+    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1)
     ; VI-LABEL: name: test_store_global_v3s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>)
-    ; VI: G_STORE [[BITCAST]](s24), [[COPY]](p1) :: (store 3, align 4, addrspace 1)
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
+    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1)
+    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     %2:_(<3 x s8>) = G_TRUNC %1


        


More information about the llvm-commits mailing list