[llvm] 31adc28 - GlobalISel: Implement fewerElementsVector for G_CONCAT_VECTORS sources

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 19 15:53:32 PDT 2020


Author: Matt Arsenault
Date: 2020-08-19T18:53:24-04:00
New Revision: 31adc28d24b1a95bb47df23068b6f61dfb5cd012

URL: https://github.com/llvm/llvm-project/commit/31adc28d24b1a95bb47df23068b6f61dfb5cd012
DIFF: https://github.com/llvm/llvm-project/commit/31adc28d24b1a95bb47df23068b6f61dfb5cd012.diff

LOG: GlobalISel: Implement fewerElementsVector for G_CONCAT_VECTORS sources

This fixes <6 x s16> = G_CONCAT_VECTORS from <3 x s16> handling.

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index f857d1dda2f7..13d966eb61b7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -198,11 +198,19 @@ class LegalizerHelper {
                    LLT PartTy, ArrayRef<Register> PartRegs,
                    LLT LeftoverTy = LLT(), ArrayRef<Register> LeftoverRegs = {});
 
-  /// Unmerge \p SrcReg into \p Parts with the greatest common divisor type with
-  /// \p DstTy and \p NarrowTy. Returns the GCD type.
+  /// Unmerge \p SrcReg into smaller sized values, and append them to \p
+  /// Parts. The elements of \p Parts will be the greatest common divisor type
+  /// of \p DstTy, \p NarrowTy and the type of \p SrcReg. This will compute and
+  /// return the GCD type.
   LLT extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
                      LLT NarrowTy, Register SrcReg);
 
+  /// Unmerge \p SrcReg into \p GCDTy typed registers. This will append all of
+  /// the unpacked registers to \p Parts. This version is if the common unmerge
+  /// type is already known.
+  void extractGCDType(SmallVectorImpl<Register> &Parts, LLT GCDTy,
+                      Register SrcReg);
+
   /// Produce a merge of values in \p VRegs to define \p DstReg. Perform a merge
   /// from the least common multiple type, and convert as appropriate to \p
   /// DstReg.
@@ -282,10 +290,12 @@ class LegalizerHelper {
   LegalizeResult fewerElementsVectorBuildVector(MachineInstr &MI,
                                                 unsigned TypeIdx,
                                                 LLT NarrowTy);
+  LegalizeResult fewerElementsVectorConcatVectors(MachineInstr &MI,
+                                                  unsigned TypeIdx,
+                                                  LLT NarrowTy);
   LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
                                                            unsigned TypeIdx,
                                                            LLT NarrowTy);
-
   LegalizeResult
   reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy);
 

diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 9999d26b362c..ee9fd3cde4a4 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -243,22 +243,21 @@ void LegalizerHelper::insertParts(Register DstReg,
   }
 }
 
-/// Return the result registers of G_UNMERGE_VALUES \p MI in \p Regs
+/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
 static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
                               const MachineInstr &MI) {
   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
 
+  const int StartIdx = Regs.size();
   const int NumResults = MI.getNumOperands() - 1;
-  Regs.resize(NumResults);
+  Regs.resize(Regs.size() + NumResults);
   for (int I = 0; I != NumResults; ++I)
-    Regs[I] = MI.getOperand(I).getReg();
+    Regs[StartIdx + I] = MI.getOperand(I).getReg();
 }
 
-LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
-                                    LLT NarrowTy, Register SrcReg) {
+void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
+                                     LLT GCDTy, Register SrcReg) {
   LLT SrcTy = MRI.getType(SrcReg);
-
-  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
   if (SrcTy == GCDTy) {
     // If the source already evenly divides the result type, we don't need to do
     // anything.
@@ -268,7 +267,13 @@ LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
     auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
     getUnmergeResults(Parts, *Unmerge);
   }
+}
 
+LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
+                                    LLT NarrowTy, Register SrcReg) {
+  LLT SrcTy = MRI.getType(SrcReg);
+  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
+  extractGCDType(Parts, GCDTy, SrcReg);
   return GCDTy;
 }
 
@@ -3609,6 +3614,34 @@ LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI,
   return Legalized;
 }
 
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorConcatVectors(MachineInstr &MI,
+                                                  unsigned TypeIdx,
+                                                  LLT NarrowTy) {
+  if (TypeIdx != 1)
+    return UnableToLegalize;
+
+  Register DstReg = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
+
+  // Break into a common type
+  SmallVector<Register, 16> Parts;
+  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
+    extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
+
+  // Build the requested new merge, padding with undef.
+  LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
+                                  TargetOpcode::G_ANYEXT);
+
+  // Pack into the original result register.
+  buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
 LegalizerHelper::LegalizeResult
 LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
                                                            unsigned TypeIdx,
@@ -4013,6 +4046,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
     return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
   case G_BUILD_VECTOR:
     return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy);
+  case G_CONCAT_VECTORS:
+    return fewerElementsVectorConcatVectors(MI, TypeIdx, NarrowTy);
   case G_EXTRACT_VECTOR_ELT:
   case G_INSERT_VECTOR_ELT:
     return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 4c3adb108031..62cd45817c44 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1434,7 +1434,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
 
   // FIXME: Clamp maximum size
   getActionDefinitionsBuilder(G_CONCAT_VECTORS)
-    .legalIf(isRegisterType(0));
+    .legalIf(all(isRegisterType(0), isRegisterType(1)))
+    .clampMaxNumElements(0, S32, 32)
+    .clampMaxNumElements(1, S16, 2) // TODO: Make 4?
+    .clampMaxNumElements(0, S16, 64);
 
   // TODO: Don't fully scalarize v2s16 pieces? Or combine out thosse
   // pre-legalize.

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
index 2d29b1352b51..c5a357732fc3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
@@ -504,12 +504,47 @@ body: |
     ; CHECK: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT4]], [[INSERT5]]
     ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[AND1]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>)
     ; CHECK: [[UV20:%[0-9]+]]:_(<3 x s16>), [[UV21:%[0-9]+]]:_(<3 x s16>), [[UV22:%[0-9]+]]:_(<3 x s16>), [[UV23:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>)
-    ; CHECK: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV16]](<3 x s16>), [[UV20]](<3 x s16>)
+    ; CHECK: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV16]](<3 x s16>), 0
+    ; CHECK: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT6]](<4 x s16>)
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV20]](<3 x s16>), 0
+    ; CHECK: [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<4 x s16>)
+    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>)
+    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>)
+    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]]
+    ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL1]]
+    ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL2]]
+    ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CHECK: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
     ; CHECK: [[CONCAT_VECTORS5:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS4]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; CHECK: [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>), [[UV26:%[0-9]+]]:_(<5 x s16>), [[UV27:%[0-9]+]]:_(<5 x s16>), [[UV28:%[0-9]+]]:_(<5 x s16>), [[UV29:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<30 x s16>)
+    ; CHECK: [[UV28:%[0-9]+]]:_(<5 x s16>), [[UV29:%[0-9]+]]:_(<5 x s16>), [[UV30:%[0-9]+]]:_(<5 x s16>), [[UV31:%[0-9]+]]:_(<5 x s16>), [[UV32:%[0-9]+]]:_(<5 x s16>), [[UV33:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<30 x s16>)
     ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV24]](<5 x s16>), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>)
+    ; CHECK: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV28]](<5 x s16>), 0
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT8]](<8 x s16>)
     %0:_(<5 x s16>) = G_IMPLICIT_DEF
     %1:_(<5 x s16>) = G_IMPLICIT_DEF
     %2:_(<5 x s16>) = G_AND %0, %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir
index f5c924bb6a96..e2724ffeb9c9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir
@@ -212,3 +212,56 @@ body: |
     %2:_(<4 x p999>) = G_CONCAT_VECTORS %0, %1
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2
 ...
+
+---
+name: concat_vectors_v6s16_v3s16
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2
+    ; CHECK-LABEL: name: concat_vectors_v6s16_v3s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
+    ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV1]](<3 x s16>), 0
+    ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
index ff614a54c44d..50cbdf92fd7d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
@@ -135,10 +135,32 @@ define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32
   ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
   ; UNPACKED:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
   ; UNPACKED:   [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-  ; UNPACKED:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV3]](<3 x s16>)
-  ; UNPACKED:   [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
-  ; UNPACKED:   $vgpr0 = COPY [[UV7]](<2 x s16>)
-  ; UNPACKED:   $vgpr1 = COPY [[UV8]](<2 x s16>)
+  ; UNPACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+  ; UNPACKED:   [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
+  ; UNPACKED:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+  ; UNPACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
+  ; UNPACKED:   [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV3]](<3 x s16>), 0
+  ; UNPACKED:   [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+  ; UNPACKED:   [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+  ; UNPACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32)
+  ; UNPACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]]
+  ; UNPACKED:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+  ; UNPACKED:   [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]]
+  ; UNPACKED:   [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C1]](s32)
+  ; UNPACKED:   [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
+  ; UNPACKED:   [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+  ; UNPACKED:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+  ; UNPACKED:   [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]]
+  ; UNPACKED:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+  ; UNPACKED:   [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C]]
+  ; UNPACKED:   [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+  ; UNPACKED:   [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
+  ; UNPACKED:   [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+  ; UNPACKED:   $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
+  ; UNPACKED:   $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
   ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   ; PACKED-LABEL: name: image_load_v3f16
   ; PACKED: bb.1 (%ir-block.0):
@@ -164,10 +186,34 @@ define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32
   ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
   ; PACKED:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>)
   ; PACKED:   [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-  ; PACKED:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV2]](<3 x s16>), [[UV4]](<3 x s16>)
-  ; PACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
-  ; PACKED:   $vgpr0 = COPY [[UV8]](<2 x s16>)
-  ; PACKED:   $vgpr1 = COPY [[UV9]](<2 x s16>)
+  ; PACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV2]](<3 x s16>), 0
+  ; PACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+  ; PACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; PACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+  ; PACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+  ; PACKED:   [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV4]](<3 x s16>), 0
+  ; PACKED:   [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+  ; PACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
+  ; PACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+  ; PACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+  ; PACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
+  ; PACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+  ; PACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+  ; PACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+  ; PACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+  ; PACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+  ; PACKED:   $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
+  ; PACKED:   $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
   ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
   ret <3 x half> %tex
@@ -387,10 +433,32 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s,
   ; UNPACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
   ; UNPACKED:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>)
   ; UNPACKED:   [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-  ; UNPACKED:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV4]](<3 x s16>)
-  ; UNPACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
-  ; UNPACKED:   $vgpr0 = COPY [[UV8]](<2 x s16>)
-  ; UNPACKED:   $vgpr1 = COPY [[UV9]](<2 x s16>)
+  ; UNPACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0
+  ; UNPACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
+  ; UNPACKED:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+  ; UNPACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
+  ; UNPACKED:   [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV4]](<3 x s16>), 0
+  ; UNPACKED:   [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+  ; UNPACKED:   [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+  ; UNPACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32)
+  ; UNPACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]]
+  ; UNPACKED:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+  ; UNPACKED:   [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]]
+  ; UNPACKED:   [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C1]](s32)
+  ; UNPACKED:   [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
+  ; UNPACKED:   [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+  ; UNPACKED:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+  ; UNPACKED:   [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]]
+  ; UNPACKED:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+  ; UNPACKED:   [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C]]
+  ; UNPACKED:   [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+  ; UNPACKED:   [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
+  ; UNPACKED:   [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+  ; UNPACKED:   $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
+  ; UNPACKED:   $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
   ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   ; PACKED-LABEL: name: image_load_tfe_v3f16
   ; PACKED: bb.1 (%ir-block.0):
@@ -420,10 +488,34 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s,
   ; PACKED:   [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
   ; PACKED:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>)
   ; PACKED:   [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-  ; PACKED:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV3]](<3 x s16>), [[UV5]](<3 x s16>)
-  ; PACKED:   [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
-  ; PACKED:   $vgpr0 = COPY [[UV9]](<2 x s16>)
-  ; PACKED:   $vgpr1 = COPY [[UV10]](<2 x s16>)
+  ; PACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV3]](<3 x s16>), 0
+  ; PACKED:   [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; PACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+  ; PACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+  ; PACKED:   [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV5]](<3 x s16>), 0
+  ; PACKED:   [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+  ; PACKED:   [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+  ; PACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
+  ; PACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+  ; PACKED:   [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+  ; PACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+  ; PACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
+  ; PACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+  ; PACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+  ; PACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+  ; PACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+  ; PACKED:   [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+  ; PACKED:   $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
+  ; PACKED:   $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
   ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <3 x half>, i32 } %res, 0
@@ -662,10 +754,32 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc,
   ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
   ; UNPACKED:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
   ; UNPACKED:   [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-  ; UNPACKED:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV2]](<3 x s16>)
-  ; UNPACKED:   [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
-  ; UNPACKED:   $vgpr0 = COPY [[UV6]](<2 x s16>)
-  ; UNPACKED:   $vgpr1 = COPY [[UV7]](<2 x s16>)
+  ; UNPACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+  ; UNPACKED:   [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
+  ; UNPACKED:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+  ; UNPACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
+  ; UNPACKED:   [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV2]](<3 x s16>), 0
+  ; UNPACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+  ; UNPACKED:   [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+  ; UNPACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32)
+  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]]
+  ; UNPACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]]
+  ; UNPACKED:   [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+  ; UNPACKED:   [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]]
+  ; UNPACKED:   [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+  ; UNPACKED:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+  ; UNPACKED:   [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]]
+  ; UNPACKED:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+  ; UNPACKED:   [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]]
+  ; UNPACKED:   [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+  ; UNPACKED:   [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+  ; UNPACKED:   [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+  ; UNPACKED:   $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
+  ; UNPACKED:   $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
   ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   ; PACKED-LABEL: name: image_load_v3f16_dmask_1100
   ; PACKED: bb.1 (%ir-block.0):
@@ -690,10 +804,34 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc,
   ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
   ; PACKED:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>)
   ; PACKED:   [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-  ; PACKED:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[UV2]](<3 x s16>)
-  ; PACKED:   [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
-  ; PACKED:   $vgpr0 = COPY [[UV6]](<2 x s16>)
-  ; PACKED:   $vgpr1 = COPY [[UV7]](<2 x s16>)
+  ; PACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
+  ; PACKED:   [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+  ; PACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; PACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+  ; PACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+  ; PACKED:   [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV2]](<3 x s16>), 0
+  ; PACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+  ; PACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
+  ; PACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+  ; PACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+  ; PACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
+  ; PACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+  ; PACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+  ; PACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+  ; PACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+  ; PACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+  ; PACKED:   $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
+  ; PACKED:   $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
   ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
   ret <3 x half> %tex
@@ -732,10 +870,32 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc,
   ; UNPACKED:   [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
   ; UNPACKED:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
   ; UNPACKED:   [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-  ; UNPACKED:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV]](<3 x s16>)
-  ; UNPACKED:   [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
-  ; UNPACKED:   $vgpr0 = COPY [[UV4]](<2 x s16>)
-  ; UNPACKED:   $vgpr1 = COPY [[UV5]](<2 x s16>)
+  ; UNPACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+  ; UNPACKED:   [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C2]](s32)
+  ; UNPACKED:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+  ; UNPACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C2]](s32)
+  ; UNPACKED:   [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
+  ; UNPACKED:   [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+  ; UNPACKED:   [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+  ; UNPACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C2]](s32)
+  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
+  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]]
+  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+  ; UNPACKED:   [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]]
+  ; UNPACKED:   [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+  ; UNPACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]]
+  ; UNPACKED:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+  ; UNPACKED:   [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]]
+  ; UNPACKED:   [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
+  ; UNPACKED:   [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
+  ; UNPACKED:   [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+  ; UNPACKED:   $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
+  ; UNPACKED:   $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
   ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   ; PACKED-LABEL: name: image_load_v3f16_dmask_1000
   ; PACKED: bb.1 (%ir-block.0):
@@ -760,10 +920,34 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc,
   ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
   ; PACKED:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>)
   ; PACKED:   [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-  ; PACKED:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[UV2]](<3 x s16>)
-  ; PACKED:   [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
-  ; PACKED:   $vgpr0 = COPY [[UV6]](<2 x s16>)
-  ; PACKED:   $vgpr1 = COPY [[UV7]](<2 x s16>)
+  ; PACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
+  ; PACKED:   [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+  ; PACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; PACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+  ; PACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+  ; PACKED:   [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV2]](<3 x s16>), 0
+  ; PACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+  ; PACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
+  ; PACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+  ; PACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+  ; PACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
+  ; PACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+  ; PACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+  ; PACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+  ; PACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+  ; PACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+  ; PACKED:   $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
+  ; PACKED:   $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
   ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
   ret <3 x half> %tex
@@ -790,10 +974,34 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc,
   ; UNPACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
   ; UNPACKED:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
   ; UNPACKED:   [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-  ; UNPACKED:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[UV4]](<3 x s16>)
-  ; UNPACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
-  ; UNPACKED:   $vgpr0 = COPY [[UV8]](<2 x s16>)
-  ; UNPACKED:   $vgpr1 = COPY [[UV9]](<2 x s16>)
+  ; UNPACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
+  ; UNPACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+  ; UNPACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+  ; UNPACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+  ; UNPACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+  ; UNPACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+  ; UNPACKED:   [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
+  ; UNPACKED:   [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+  ; UNPACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+  ; UNPACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+  ; UNPACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+  ; UNPACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
+  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+  ; UNPACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+  ; UNPACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+  ; UNPACKED:   [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
+  ; UNPACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+  ; UNPACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+  ; UNPACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+  ; UNPACKED:   $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
+  ; UNPACKED:   $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
   ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   ; PACKED-LABEL: name: image_load_v3f16_dmask_0000
   ; PACKED: bb.1 (%ir-block.0):
@@ -815,10 +1023,34 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc,
   ; PACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
   ; PACKED:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
   ; PACKED:   [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-  ; PACKED:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[UV4]](<3 x s16>)
-  ; PACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
-  ; PACKED:   $vgpr0 = COPY [[UV8]](<2 x s16>)
-  ; PACKED:   $vgpr1 = COPY [[UV9]](<2 x s16>)
+  ; PACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
+  ; PACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+  ; PACKED:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; PACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+  ; PACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+  ; PACKED:   [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
+  ; PACKED:   [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+  ; PACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
+  ; PACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+  ; PACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+  ; PACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
+  ; PACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+  ; PACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+  ; PACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+  ; PACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+  ; PACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+  ; PACKED:   $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
+  ; PACKED:   $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
   ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
   ret <3 x half> %tex
@@ -1246,10 +1478,32 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs
   ; UNPACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
   ; UNPACKED:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>)
   ; UNPACKED:   [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-  ; UNPACKED:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV3]](<3 x s16>)
-  ; UNPACKED:   [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
-  ; UNPACKED:   $vgpr0 = COPY [[UV7]](<2 x s16>)
-  ; UNPACKED:   $vgpr1 = COPY [[UV8]](<2 x s16>)
+  ; UNPACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0
+  ; UNPACKED:   [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
+  ; UNPACKED:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+  ; UNPACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
+  ; UNPACKED:   [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV3]](<3 x s16>), 0
+  ; UNPACKED:   [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+  ; UNPACKED:   [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+  ; UNPACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32)
+  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]]
+  ; UNPACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]]
+  ; UNPACKED:   [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+  ; UNPACKED:   [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]]
+  ; UNPACKED:   [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+  ; UNPACKED:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+  ; UNPACKED:   [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]]
+  ; UNPACKED:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+  ; UNPACKED:   [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]]
+  ; UNPACKED:   [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+  ; UNPACKED:   [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+  ; UNPACKED:   [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+  ; UNPACKED:   $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
+  ; UNPACKED:   $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
   ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100
   ; PACKED: bb.1 (%ir-block.0):
@@ -1278,10 +1532,34 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs
   ; PACKED:   [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
   ; PACKED:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>)
   ; PACKED:   [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-  ; PACKED:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV2]](<3 x s16>), [[UV4]](<3 x s16>)
-  ; PACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
-  ; PACKED:   $vgpr0 = COPY [[UV8]](<2 x s16>)
-  ; PACKED:   $vgpr1 = COPY [[UV9]](<2 x s16>)
+  ; PACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV2]](<3 x s16>), 0
+  ; PACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; PACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+  ; PACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+  ; PACKED:   [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV4]](<3 x s16>), 0
+  ; PACKED:   [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+  ; PACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
+  ; PACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+  ; PACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+  ; PACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+  ; PACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
+  ; PACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+  ; PACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+  ; PACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+  ; PACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+  ; PACKED:   [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+  ; PACKED:   $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
+  ; PACKED:   $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
   ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <3 x half>, i32 } %res, 0
@@ -1326,10 +1604,32 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs
   ; UNPACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
   ; UNPACKED:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>)
   ; UNPACKED:   [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-  ; UNPACKED:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV2]](<3 x s16>)
-  ; UNPACKED:   [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
-  ; UNPACKED:   $vgpr0 = COPY [[UV6]](<2 x s16>)
-  ; UNPACKED:   $vgpr1 = COPY [[UV7]](<2 x s16>)
+  ; UNPACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0
+  ; UNPACKED:   [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C2]](s32)
+  ; UNPACKED:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+  ; UNPACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C2]](s32)
+  ; UNPACKED:   [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV2]](<3 x s16>), 0
+  ; UNPACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+  ; UNPACKED:   [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+  ; UNPACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C2]](s32)
+  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
+  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]]
+  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+  ; UNPACKED:   [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]]
+  ; UNPACKED:   [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+  ; UNPACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]]
+  ; UNPACKED:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+  ; UNPACKED:   [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]]
+  ; UNPACKED:   [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
+  ; UNPACKED:   [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
+  ; UNPACKED:   [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+  ; UNPACKED:   $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
+  ; UNPACKED:   $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
   ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000
   ; PACKED: bb.1 (%ir-block.0):
@@ -1358,10 +1658,34 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs
   ; PACKED:   [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
   ; PACKED:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>)
   ; PACKED:   [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-  ; PACKED:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV2]](<3 x s16>), [[UV4]](<3 x s16>)
-  ; PACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
-  ; PACKED:   $vgpr0 = COPY [[UV8]](<2 x s16>)
-  ; PACKED:   $vgpr1 = COPY [[UV9]](<2 x s16>)
+  ; PACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV2]](<3 x s16>), 0
+  ; PACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; PACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+  ; PACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+  ; PACKED:   [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV4]](<3 x s16>), 0
+  ; PACKED:   [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+  ; PACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
+  ; PACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+  ; PACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+  ; PACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+  ; PACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
+  ; PACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+  ; PACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+  ; PACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+  ; PACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+  ; PACKED:   [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+  ; PACKED:   $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
+  ; PACKED:   $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
   ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <3 x half>, i32 } %res, 0
@@ -1406,10 +1730,32 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs
   ; UNPACKED:   [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
   ; UNPACKED:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>)
   ; UNPACKED:   [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-  ; UNPACKED:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV2]](<3 x s16>)
-  ; UNPACKED:   [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
-  ; UNPACKED:   $vgpr0 = COPY [[UV6]](<2 x s16>)
-  ; UNPACKED:   $vgpr1 = COPY [[UV7]](<2 x s16>)
+  ; UNPACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0
+  ; UNPACKED:   [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+  ; UNPACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+  ; UNPACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C2]](s32)
+  ; UNPACKED:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+  ; UNPACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C2]](s32)
+  ; UNPACKED:   [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV2]](<3 x s16>), 0
+  ; UNPACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+  ; UNPACKED:   [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+  ; UNPACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C2]](s32)
+  ; UNPACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+  ; UNPACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
+  ; UNPACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+  ; UNPACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]]
+  ; UNPACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+  ; UNPACKED:   [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]]
+  ; UNPACKED:   [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+  ; UNPACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+  ; UNPACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]]
+  ; UNPACKED:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+  ; UNPACKED:   [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]]
+  ; UNPACKED:   [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
+  ; UNPACKED:   [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
+  ; UNPACKED:   [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+  ; UNPACKED:   $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
+  ; UNPACKED:   $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
   ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000
   ; PACKED: bb.1 (%ir-block.0):
@@ -1438,10 +1784,34 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs
   ; PACKED:   [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
   ; PACKED:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>)
   ; PACKED:   [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-  ; PACKED:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV2]](<3 x s16>), [[UV4]](<3 x s16>)
-  ; PACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
-  ; PACKED:   $vgpr0 = COPY [[UV8]](<2 x s16>)
-  ; PACKED:   $vgpr1 = COPY [[UV9]](<2 x s16>)
+  ; PACKED:   [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV2]](<3 x s16>), 0
+  ; PACKED:   [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+  ; PACKED:   [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+  ; PACKED:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; PACKED:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+  ; PACKED:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+  ; PACKED:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+  ; PACKED:   [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV4]](<3 x s16>), 0
+  ; PACKED:   [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+  ; PACKED:   [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+  ; PACKED:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+  ; PACKED:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+  ; PACKED:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+  ; PACKED:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
+  ; PACKED:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+  ; PACKED:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+  ; PACKED:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+  ; PACKED:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+  ; PACKED:   [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+  ; PACKED:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+  ; PACKED:   [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
+  ; PACKED:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+  ; PACKED:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+  ; PACKED:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+  ; PACKED:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+  ; PACKED:   [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+  ; PACKED:   $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
+  ; PACKED:   $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
   ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <3 x half>, i32 } %res, 0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
index 1fed9c068afc..8bb7b2a7c20b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
@@ -7279,11 +7279,44 @@ body: |
     ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; SI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
     ; SI: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>)
-    ; SI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>)
-    ; SI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<10 x s16>)
-    ; SI: $vgpr0 = COPY [[UV24]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[UV25]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[UV26]](<2 x s16>)
+    ; SI: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0
+    ; SI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>)
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
+    ; SI: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0
+    ; SI: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]]
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
+    ; SI: $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
+    ; SI: $vgpr2 = COPY [[BITCAST6]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v5s16_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 8, addrspace 1)
@@ -7383,11 +7416,44 @@ body: |
     ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; SI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
     ; SI: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>)
-    ; SI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>)
-    ; SI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<10 x s16>)
-    ; SI: $vgpr0 = COPY [[UV24]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[UV25]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[UV26]](<2 x s16>)
+    ; SI: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0
+    ; SI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>)
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
+    ; SI: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0
+    ; SI: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]]
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
+    ; SI: $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
+    ; SI: $vgpr2 = COPY [[BITCAST6]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v5s16_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 4, addrspace 1)
@@ -7513,11 +7579,42 @@ body: |
     ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; SI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
     ; SI: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<30 x s16>)
-    ; SI: [[CONCAT_VECTORS5:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>)
-    ; SI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<10 x s16>)
-    ; SI: $vgpr0 = COPY [[UV24]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[UV25]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[UV26]](<2 x s16>)
+    ; SI: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0
+    ; SI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>)
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
+    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
+    ; SI: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0
+    ; SI: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>)
+    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
+    ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; SI: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+    ; SI: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+    ; SI: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v5s16_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 2, addrspace 1)
@@ -7578,11 +7675,42 @@ body: |
     ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; CI-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
     ; CI-MESA: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<30 x s16>)
-    ; CI-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>)
-    ; CI-MESA: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<10 x s16>)
-    ; CI-MESA: $vgpr0 = COPY [[UV24]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[UV25]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[UV26]](<2 x s16>)
+    ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0
+    ; CI-MESA: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>)
+    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>)
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32)
+    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>)
+    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
+    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>)
+    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
+    ; CI-MESA: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0
+    ; CI-MESA: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>)
+    ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>)
+    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
+    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
+    ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; CI-MESA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+    ; CI-MESA: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+    ; CI-MESA: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v5s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
@@ -7631,11 +7759,42 @@ body: |
     ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; VI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
     ; VI: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<30 x s16>)
-    ; VI: [[CONCAT_VECTORS5:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>)
-    ; VI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<10 x s16>)
-    ; VI: $vgpr0 = COPY [[UV24]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[UV25]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[UV26]](<2 x s16>)
+    ; VI: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0
+    ; VI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>)
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
+    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
+    ; VI: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0
+    ; VI: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>)
+    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
+    ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+    ; VI: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+    ; VI: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 2, addrspace 1)
@@ -7686,11 +7845,31 @@ body: |
     ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; GFX9-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
     ; GFX9-MESA: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<30 x s16>)
-    ; GFX9-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>)
-    ; GFX9-MESA: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<10 x s16>)
-    ; GFX9-MESA: $vgpr0 = COPY [[UV24]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[UV25]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[UV26]](<2 x s16>)
+    ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0
+    ; GFX9-MESA: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>)
+    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C4]](s32)
+    ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32)
+    ; GFX9-MESA: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0
+    ; GFX9-MESA: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>)
+    ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
+    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
+    ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x s16>) = G_LOAD %0 :: (load 10, align 2, addrspace 1)
     %2:_(<5 x s16>) = G_IMPLICIT_DEF
@@ -7802,11 +7981,43 @@ body: |
     ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; SI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
     ; SI: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<30 x s16>)
-    ; SI: [[CONCAT_VECTORS5:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>)
-    ; SI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<10 x s16>)
-    ; SI: $vgpr0 = COPY [[UV24]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[UV25]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[UV26]](<2 x s16>)
+    ; SI: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0
+    ; SI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>)
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C5]](s32)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32)
+    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C5]](s32)
+    ; SI: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0
+    ; SI: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>)
+    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C5]](s32)
+    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C8]]
+    ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C8]]
+    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C5]](s32)
+    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL7]]
+    ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C8]]
+    ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C8]]
+    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C5]](s32)
+    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL8]]
+    ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+    ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C8]]
+    ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C8]]
+    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C5]](s32)
+    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL9]]
+    ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+    ; SI: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+    ; SI: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+    ; SI: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v5s16_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 1, addrspace 1)
@@ -7913,11 +8124,43 @@ body: |
     ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; CI-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
     ; CI-MESA: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<30 x s16>)
-    ; CI-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>)
-    ; CI-MESA: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<10 x s16>)
-    ; CI-MESA: $vgpr0 = COPY [[UV24]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[UV25]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[UV26]](<2 x s16>)
+    ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0
+    ; CI-MESA: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>)
+    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>)
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C5]](s32)
+    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>)
+    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32)
+    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>)
+    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C5]](s32)
+    ; CI-MESA: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0
+    ; CI-MESA: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>)
+    ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>)
+    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C5]](s32)
+    ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C8]]
+    ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C8]]
+    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C5]](s32)
+    ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL7]]
+    ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C8]]
+    ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C8]]
+    ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C5]](s32)
+    ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL8]]
+    ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+    ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C8]]
+    ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C8]]
+    ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C5]](s32)
+    ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL9]]
+    ; CI-MESA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+    ; CI-MESA: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+    ; CI-MESA: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v5s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1)
@@ -8002,11 +8245,43 @@ body: |
     ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; VI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
     ; VI: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<30 x s16>)
-    ; VI: [[CONCAT_VECTORS5:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>)
-    ; VI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<10 x s16>)
-    ; VI: $vgpr0 = COPY [[UV24]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[UV25]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[UV26]](<2 x s16>)
+    ; VI: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0
+    ; VI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>)
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
+    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
+    ; VI: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0
+    ; VI: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>)
+    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
+    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]]
+    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32)
+    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL7]]
+    ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]]
+    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL8]]
+    ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C4]](s32)
+    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL9]]
+    ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+    ; VI: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+    ; VI: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+    ; VI: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 1, addrspace 1)
@@ -8098,11 +8373,31 @@ body: |
     ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
     ; GFX9-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
     ; GFX9-MESA: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<30 x s16>)
-    ; GFX9-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>)
-    ; GFX9-MESA: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<10 x s16>)
-    ; GFX9-MESA: $vgpr0 = COPY [[UV24]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[UV25]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[UV26]](<2 x s16>)
+    ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0
+    ; GFX9-MESA: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>)
+    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32)
+    ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C6]](s32)
+    ; GFX9-MESA: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0
+    ; GFX9-MESA: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>)
+    ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C6]](s32)
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x s16>) = G_LOAD %0 :: (load 10, align 1, addrspace 1)
     %2:_(<5 x s16>) = G_IMPLICIT_DEF
@@ -9167,12 +9462,52 @@ body: |
     ; SI: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
     ; SI: [[CONCAT_VECTORS7:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF4]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
     ; SI: [[UV36:%[0-9]+]]:_(<7 x s16>), [[UV37:%[0-9]+]]:_(<7 x s16>), [[UV38:%[0-9]+]]:_(<7 x s16>), [[UV39:%[0-9]+]]:_(<7 x s16>), [[UV40:%[0-9]+]]:_(<7 x s16>), [[UV41:%[0-9]+]]:_(<7 x s16>), [[UV42:%[0-9]+]]:_(<7 x s16>), [[UV43:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS7]](<56 x s16>)
-    ; SI: [[CONCAT_VECTORS8:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV28]](<7 x s16>), [[UV36]](<7 x s16>)
-    ; SI: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS8]](<14 x s16>)
-    ; SI: $vgpr0 = COPY [[UV44]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[UV45]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[UV46]](<2 x s16>)
-    ; SI: $vgpr3 = COPY [[UV47]](<2 x s16>)
+    ; SI: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV28]](<7 x s16>), 0
+    ; SI: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT8]](<8 x s16>)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV44]](<2 x s16>)
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
+    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV45]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
+    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV46]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
+    ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV47]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32)
+    ; SI: [[INSERT9:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV36]](<7 x s16>), 0
+    ; SI: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT9]](<8 x s16>)
+    ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV48]](<2 x s16>)
+    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C4]](s32)
+    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
+    ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32)
+    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]]
+    ; SI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+    ; SI: $vgpr0 = COPY [[BITCAST8]](<2 x s16>)
+    ; SI: $vgpr1 = COPY [[BITCAST9]](<2 x s16>)
+    ; SI: $vgpr2 = COPY [[BITCAST10]](<2 x s16>)
+    ; SI: $vgpr3 = COPY [[BITCAST11]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v7s16_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 2, addrspace 1)
@@ -9257,12 +9592,52 @@ body: |
     ; CI-MESA: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
     ; CI-MESA: [[CONCAT_VECTORS7:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF4]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
     ; CI-MESA: [[UV36:%[0-9]+]]:_(<7 x s16>), [[UV37:%[0-9]+]]:_(<7 x s16>), [[UV38:%[0-9]+]]:_(<7 x s16>), [[UV39:%[0-9]+]]:_(<7 x s16>), [[UV40:%[0-9]+]]:_(<7 x s16>), [[UV41:%[0-9]+]]:_(<7 x s16>), [[UV42:%[0-9]+]]:_(<7 x s16>), [[UV43:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS7]](<56 x s16>)
-    ; CI-MESA: [[CONCAT_VECTORS8:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV28]](<7 x s16>), [[UV36]](<7 x s16>)
-    ; CI-MESA: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS8]](<14 x s16>)
-    ; CI-MESA: $vgpr0 = COPY [[UV44]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[UV45]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[UV46]](<2 x s16>)
-    ; CI-MESA: $vgpr3 = COPY [[UV47]](<2 x s16>)
+    ; CI-MESA: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV28]](<7 x s16>), 0
+    ; CI-MESA: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT8]](<8 x s16>)
+    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV44]](<2 x s16>)
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
+    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV45]](<2 x s16>)
+    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
+    ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV46]](<2 x s16>)
+    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
+    ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV47]](<2 x s16>)
+    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32)
+    ; CI-MESA: [[INSERT9:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV36]](<7 x s16>), 0
+    ; CI-MESA: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT9]](<8 x s16>)
+    ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV48]](<2 x s16>)
+    ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C4]](s32)
+    ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CI-MESA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
+    ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; CI-MESA: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32)
+    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; CI-MESA: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]]
+    ; CI-MESA: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[BITCAST8]](<2 x s16>)
+    ; CI-MESA: $vgpr1 = COPY [[BITCAST9]](<2 x s16>)
+    ; CI-MESA: $vgpr2 = COPY [[BITCAST10]](<2 x s16>)
+    ; CI-MESA: $vgpr3 = COPY [[BITCAST11]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v7s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
@@ -9334,12 +9709,52 @@ body: |
     ; VI: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
     ; VI: [[CONCAT_VECTORS7:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF4]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
     ; VI: [[UV36:%[0-9]+]]:_(<7 x s16>), [[UV37:%[0-9]+]]:_(<7 x s16>), [[UV38:%[0-9]+]]:_(<7 x s16>), [[UV39:%[0-9]+]]:_(<7 x s16>), [[UV40:%[0-9]+]]:_(<7 x s16>), [[UV41:%[0-9]+]]:_(<7 x s16>), [[UV42:%[0-9]+]]:_(<7 x s16>), [[UV43:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS7]](<56 x s16>)
-    ; VI: [[CONCAT_VECTORS8:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV28]](<7 x s16>), [[UV36]](<7 x s16>)
-    ; VI: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS8]](<14 x s16>)
-    ; VI: $vgpr0 = COPY [[UV44]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[UV45]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[UV46]](<2 x s16>)
-    ; VI: $vgpr3 = COPY [[UV47]](<2 x s16>)
+    ; VI: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV28]](<7 x s16>), 0
+    ; VI: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT8]](<8 x s16>)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV44]](<2 x s16>)
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
+    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV45]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
+    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV46]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
+    ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV47]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32)
+    ; VI: [[INSERT9:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV36]](<7 x s16>), 0
+    ; VI: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT9]](<8 x s16>)
+    ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV48]](<2 x s16>)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C4]](s32)
+    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
+    ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
+    ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
+    ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]]
+    ; VI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+    ; VI: $vgpr0 = COPY [[BITCAST8]](<2 x s16>)
+    ; VI: $vgpr1 = COPY [[BITCAST9]](<2 x s16>)
+    ; VI: $vgpr2 = COPY [[BITCAST10]](<2 x s16>)
+    ; VI: $vgpr3 = COPY [[BITCAST11]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 2, addrspace 1)
@@ -9410,12 +9825,37 @@ body: |
     ; GFX9-MESA: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
     ; GFX9-MESA: [[CONCAT_VECTORS7:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF4]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
     ; GFX9-MESA: [[UV36:%[0-9]+]]:_(<7 x s16>), [[UV37:%[0-9]+]]:_(<7 x s16>), [[UV38:%[0-9]+]]:_(<7 x s16>), [[UV39:%[0-9]+]]:_(<7 x s16>), [[UV40:%[0-9]+]]:_(<7 x s16>), [[UV41:%[0-9]+]]:_(<7 x s16>), [[UV42:%[0-9]+]]:_(<7 x s16>), [[UV43:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS7]](<56 x s16>)
-    ; GFX9-MESA: [[CONCAT_VECTORS8:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV28]](<7 x s16>), [[UV36]](<7 x s16>)
-    ; GFX9-MESA: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS8]](<14 x s16>)
-    ; GFX9-MESA: $vgpr0 = COPY [[UV44]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[UV45]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[UV46]](<2 x s16>)
-    ; GFX9-MESA: $vgpr3 = COPY [[UV47]](<2 x s16>)
+    ; GFX9-MESA: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV28]](<7 x s16>), 0
+    ; GFX9-MESA: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT8]](<8 x s16>)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV44]](<2 x s16>)
+    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV45]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C4]](s32)
+    ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV46]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32)
+    ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV47]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
+    ; GFX9-MESA: [[INSERT9:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV36]](<7 x s16>), 0
+    ; GFX9-MESA: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT9]](<8 x s16>)
+    ; GFX9-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV48]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
+    ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
+    ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
+    ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
+    ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
+    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
+    ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<7 x s16>) = G_LOAD %0 :: (load 14, align 2, addrspace 1)
     %2:_(<7 x s16>) = G_IMPLICIT_DEF
@@ -9569,12 +10009,53 @@ body: |
     ; SI: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
     ; SI: [[CONCAT_VECTORS7:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF4]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
     ; SI: [[UV36:%[0-9]+]]:_(<7 x s16>), [[UV37:%[0-9]+]]:_(<7 x s16>), [[UV38:%[0-9]+]]:_(<7 x s16>), [[UV39:%[0-9]+]]:_(<7 x s16>), [[UV40:%[0-9]+]]:_(<7 x s16>), [[UV41:%[0-9]+]]:_(<7 x s16>), [[UV42:%[0-9]+]]:_(<7 x s16>), [[UV43:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS7]](<56 x s16>)
-    ; SI: [[CONCAT_VECTORS8:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV28]](<7 x s16>), [[UV36]](<7 x s16>)
-    ; SI: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS8]](<14 x s16>)
-    ; SI: $vgpr0 = COPY [[UV44]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[UV45]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[UV46]](<2 x s16>)
-    ; SI: $vgpr3 = COPY [[UV47]](<2 x s16>)
+    ; SI: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV28]](<7 x s16>), 0
+    ; SI: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT8]](<8 x s16>)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV44]](<2 x s16>)
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32)
+    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV45]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C5]](s32)
+    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV46]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C5]](s32)
+    ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV47]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C5]](s32)
+    ; SI: [[INSERT9:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV36]](<7 x s16>), 0
+    ; SI: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT9]](<8 x s16>)
+    ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV48]](<2 x s16>)
+    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C5]](s32)
+    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C8]]
+    ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C8]]
+    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C5]](s32)
+    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL10]]
+    ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+    ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C8]]
+    ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C8]]
+    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C5]](s32)
+    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL11]]
+    ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
+    ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C8]]
+    ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C8]]
+    ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C5]](s32)
+    ; SI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL12]]
+    ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32)
+    ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C8]]
+    ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C8]]
+    ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C5]](s32)
+    ; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL13]]
+    ; SI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
+    ; SI: $vgpr0 = COPY [[BITCAST8]](<2 x s16>)
+    ; SI: $vgpr1 = COPY [[BITCAST9]](<2 x s16>)
+    ; SI: $vgpr2 = COPY [[BITCAST10]](<2 x s16>)
+    ; SI: $vgpr3 = COPY [[BITCAST11]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v7s16_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 1, addrspace 1)
@@ -9723,12 +10204,53 @@ body: |
     ; CI-MESA: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
     ; CI-MESA: [[CONCAT_VECTORS7:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF4]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
     ; CI-MESA: [[UV36:%[0-9]+]]:_(<7 x s16>), [[UV37:%[0-9]+]]:_(<7 x s16>), [[UV38:%[0-9]+]]:_(<7 x s16>), [[UV39:%[0-9]+]]:_(<7 x s16>), [[UV40:%[0-9]+]]:_(<7 x s16>), [[UV41:%[0-9]+]]:_(<7 x s16>), [[UV42:%[0-9]+]]:_(<7 x s16>), [[UV43:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS7]](<56 x s16>)
-    ; CI-MESA: [[CONCAT_VECTORS8:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV28]](<7 x s16>), [[UV36]](<7 x s16>)
-    ; CI-MESA: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS8]](<14 x s16>)
-    ; CI-MESA: $vgpr0 = COPY [[UV44]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[UV45]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[UV46]](<2 x s16>)
-    ; CI-MESA: $vgpr3 = COPY [[UV47]](<2 x s16>)
+    ; CI-MESA: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV28]](<7 x s16>), 0
+    ; CI-MESA: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT8]](<8 x s16>)
+    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV44]](<2 x s16>)
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32)
+    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV45]](<2 x s16>)
+    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C5]](s32)
+    ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV46]](<2 x s16>)
+    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C5]](s32)
+    ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV47]](<2 x s16>)
+    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C5]](s32)
+    ; CI-MESA: [[INSERT9:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV36]](<7 x s16>), 0
+    ; CI-MESA: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT9]](<8 x s16>)
+    ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV48]](<2 x s16>)
+    ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C5]](s32)
+    ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C8]]
+    ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C8]]
+    ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C5]](s32)
+    ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL10]]
+    ; CI-MESA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+    ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; CI-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C8]]
+    ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C8]]
+    ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C5]](s32)
+    ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL11]]
+    ; CI-MESA: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
+    ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; CI-MESA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C8]]
+    ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C8]]
+    ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C5]](s32)
+    ; CI-MESA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL12]]
+    ; CI-MESA: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32)
+    ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; CI-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C8]]
+    ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C8]]
+    ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C5]](s32)
+    ; CI-MESA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL13]]
+    ; CI-MESA: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[BITCAST8]](<2 x s16>)
+    ; CI-MESA: $vgpr1 = COPY [[BITCAST9]](<2 x s16>)
+    ; CI-MESA: $vgpr2 = COPY [[BITCAST10]](<2 x s16>)
+    ; CI-MESA: $vgpr3 = COPY [[BITCAST11]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v7s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1)
@@ -9850,12 +10372,53 @@ body: |
     ; VI: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
     ; VI: [[CONCAT_VECTORS7:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF4]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
     ; VI: [[UV36:%[0-9]+]]:_(<7 x s16>), [[UV37:%[0-9]+]]:_(<7 x s16>), [[UV38:%[0-9]+]]:_(<7 x s16>), [[UV39:%[0-9]+]]:_(<7 x s16>), [[UV40:%[0-9]+]]:_(<7 x s16>), [[UV41:%[0-9]+]]:_(<7 x s16>), [[UV42:%[0-9]+]]:_(<7 x s16>), [[UV43:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS7]](<56 x s16>)
-    ; VI: [[CONCAT_VECTORS8:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV28]](<7 x s16>), [[UV36]](<7 x s16>)
-    ; VI: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS8]](<14 x s16>)
-    ; VI: $vgpr0 = COPY [[UV44]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[UV45]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[UV46]](<2 x s16>)
-    ; VI: $vgpr3 = COPY [[UV47]](<2 x s16>)
+    ; VI: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV28]](<7 x s16>), 0
+    ; VI: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT8]](<8 x s16>)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV44]](<2 x s16>)
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
+    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV45]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
+    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV46]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
+    ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV47]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32)
+    ; VI: [[INSERT9:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV36]](<7 x s16>), 0
+    ; VI: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT9]](<8 x s16>)
+    ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV48]](<2 x s16>)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C4]](s32)
+    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]]
+    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C4]](s32)
+    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL10]]
+    ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]]
+    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL11]]
+    ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C4]](s32)
+    ; VI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL12]]
+    ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32)
+    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
+    ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL13]]
+    ; VI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
+    ; VI: $vgpr0 = COPY [[BITCAST8]](<2 x s16>)
+    ; VI: $vgpr1 = COPY [[BITCAST9]](<2 x s16>)
+    ; VI: $vgpr2 = COPY [[BITCAST10]](<2 x s16>)
+    ; VI: $vgpr3 = COPY [[BITCAST11]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 1, addrspace 1)
@@ -9983,12 +10546,37 @@ body: |
     ; GFX9-MESA: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
     ; GFX9-MESA: [[CONCAT_VECTORS7:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF4]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
     ; GFX9-MESA: [[UV36:%[0-9]+]]:_(<7 x s16>), [[UV37:%[0-9]+]]:_(<7 x s16>), [[UV38:%[0-9]+]]:_(<7 x s16>), [[UV39:%[0-9]+]]:_(<7 x s16>), [[UV40:%[0-9]+]]:_(<7 x s16>), [[UV41:%[0-9]+]]:_(<7 x s16>), [[UV42:%[0-9]+]]:_(<7 x s16>), [[UV43:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS7]](<56 x s16>)
-    ; GFX9-MESA: [[CONCAT_VECTORS8:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV28]](<7 x s16>), [[UV36]](<7 x s16>)
-    ; GFX9-MESA: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS8]](<14 x s16>)
-    ; GFX9-MESA: $vgpr0 = COPY [[UV44]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[UV45]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[UV46]](<2 x s16>)
-    ; GFX9-MESA: $vgpr3 = COPY [[UV47]](<2 x s16>)
+    ; GFX9-MESA: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV28]](<7 x s16>), 0
+    ; GFX9-MESA: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT8]](<8 x s16>)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV44]](<2 x s16>)
+    ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV45]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32)
+    ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV46]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C6]](s32)
+    ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV47]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C6]](s32)
+    ; GFX9-MESA: [[INSERT9:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV36]](<7 x s16>), 0
+    ; GFX9-MESA: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT9]](<8 x s16>)
+    ; GFX9-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV48]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C6]](s32)
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
+    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
+    ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<7 x s16>) = G_LOAD %0 :: (load 14, align 1, addrspace 1)
     %2:_(<7 x s16>) = G_IMPLICIT_DEF

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
index 86b4a8e35bd9..1cfc08d677fa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
@@ -504,12 +504,47 @@ body: |
     ; CHECK: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT4]], [[INSERT5]]
     ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[OR1]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>)
     ; CHECK: [[UV20:%[0-9]+]]:_(<3 x s16>), [[UV21:%[0-9]+]]:_(<3 x s16>), [[UV22:%[0-9]+]]:_(<3 x s16>), [[UV23:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>)
-    ; CHECK: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV16]](<3 x s16>), [[UV20]](<3 x s16>)
+    ; CHECK: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV16]](<3 x s16>), 0
+    ; CHECK: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT6]](<4 x s16>)
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV20]](<3 x s16>), 0
+    ; CHECK: [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<4 x s16>)
+    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>)
+    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>)
+    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CHECK: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
     ; CHECK: [[CONCAT_VECTORS5:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS4]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; CHECK: [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>), [[UV26:%[0-9]+]]:_(<5 x s16>), [[UV27:%[0-9]+]]:_(<5 x s16>), [[UV28:%[0-9]+]]:_(<5 x s16>), [[UV29:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<30 x s16>)
+    ; CHECK: [[UV28:%[0-9]+]]:_(<5 x s16>), [[UV29:%[0-9]+]]:_(<5 x s16>), [[UV30:%[0-9]+]]:_(<5 x s16>), [[UV31:%[0-9]+]]:_(<5 x s16>), [[UV32:%[0-9]+]]:_(<5 x s16>), [[UV33:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<30 x s16>)
     ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV24]](<5 x s16>), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>)
+    ; CHECK: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV28]](<5 x s16>), 0
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT8]](<8 x s16>)
     %0:_(<5 x s16>) = G_IMPLICIT_DEF
     %1:_(<5 x s16>) = G_IMPLICIT_DEF
     %2:_(<5 x s16>) = G_OR %0, %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir
index 50311d6160d0..02ea5a63e740 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir
@@ -180,7 +180,42 @@ body: |
   ; CHECK:   [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
   ; CHECK:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>)
   ; CHECK:   [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-  ; CHECK:   [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV4]](<3 x s16>), [[UV8]](<3 x s16>)
+  ; CHECK:   [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV4]](<3 x s16>), 0
+  ; CHECK:   [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<4 x s16>)
+  ; CHECK:   [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; CHECK:   [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32)
+  ; CHECK:   [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+  ; CHECK:   [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C4]](s32)
+  ; CHECK:   [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV8]](<3 x s16>), 0
+  ; CHECK:   [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<4 x s16>)
+  ; CHECK:   [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
+  ; CHECK:   [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C4]](s32)
+  ; CHECK:   [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
+  ; CHECK:   [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C4]](s32)
+  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+  ; CHECK:   [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C5]]
+  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+  ; CHECK:   [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]]
+  ; CHECK:   [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
+  ; CHECK:   [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
+  ; CHECK:   [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+  ; CHECK:   [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C5]]
+  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32)
+  ; CHECK:   [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]]
+  ; CHECK:   [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C4]](s32)
+  ; CHECK:   [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
+  ; CHECK:   [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+  ; CHECK:   [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C5]]
+  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32)
+  ; CHECK:   [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]]
+  ; CHECK:   [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C4]](s32)
+  ; CHECK:   [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
+  ; CHECK:   [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+  ; CHECK:   [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>)
   ; CHECK:   $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS3]](<6 x s16>)
   ; CHECK:   S_SETPC_B64 undef $sgpr30_sgpr31
   bb.0:

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
index 224ee57c11b5..5e3a15ccd5df 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
@@ -535,7 +535,40 @@ body: |
     ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
     ; GFX6: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>)
+    ; GFX6: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX6: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX6: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; GFX6: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; GFX6: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0
+    ; GFX6: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>)
+    ; GFX6: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
+    ; GFX6: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32)
+    ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]]
+    ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C4]]
+    ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
+    ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL8]]
+    ; GFX6: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C4]]
+    ; GFX6: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32)
+    ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C4]]
+    ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
+    ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL9]]
+    ; GFX6: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; GFX6: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C4]]
+    ; GFX6: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32)
+    ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C4]]
+    ; GFX6: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
+    ; GFX6: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL10]]
+    ; GFX6: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>)
     ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
     ; GFX8-LABEL: name: saddsat_v3s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -599,7 +632,41 @@ body: |
     ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX8: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
     ; GFX8: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>)
+    ; GFX8: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX8: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX8: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; GFX8: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; GFX8: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0
+    ; GFX8: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>)
+    ; GFX8: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX8: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
+    ; GFX8: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX8: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32)
+    ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]]
+    ; GFX8: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32)
+    ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; GFX8: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]]
+    ; GFX8: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
+    ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32)
+    ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
+    ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX8: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]]
+    ; GFX8: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>)
     ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
     ; GFX9-LABEL: name: saddsat_v3s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -637,7 +704,28 @@ body: |
     ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
     ; GFX9: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>)
+    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0
+    ; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>)
+    ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
+    ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
+    ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
+    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
     %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir
index 4193662d9d45..555a18b05f7f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir
@@ -524,7 +524,40 @@ body: |
     ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
     ; GFX6: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>)
+    ; GFX6: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX6: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX6: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; GFX6: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; GFX6: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0
+    ; GFX6: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>)
+    ; GFX6: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
+    ; GFX6: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32)
+    ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
+    ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+    ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL8]]
+    ; GFX6: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+    ; GFX6: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32)
+    ; GFX6: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]]
+    ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL9]]
+    ; GFX6: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; GFX6: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; GFX6: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C1]]
+    ; GFX6: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32)
+    ; GFX6: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C1]]
+    ; GFX6: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32)
+    ; GFX6: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL10]]
+    ; GFX6: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>)
     ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
     ; GFX8-LABEL: name: sshlsat_v3s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
index bb3d7960420d..d39d926f7188 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
@@ -535,7 +535,40 @@ body: |
     ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
     ; GFX6: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>)
+    ; GFX6: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX6: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX6: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; GFX6: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; GFX6: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0
+    ; GFX6: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>)
+    ; GFX6: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
+    ; GFX6: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32)
+    ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]]
+    ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]]
+    ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
+    ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL8]]
+    ; GFX6: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C4]]
+    ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32)
+    ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C4]]
+    ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
+    ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL9]]
+    ; GFX6: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; GFX6: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C4]]
+    ; GFX6: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32)
+    ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C4]]
+    ; GFX6: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
+    ; GFX6: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL10]]
+    ; GFX6: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>)
     ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
     ; GFX8-LABEL: name: ssubsat_v3s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -599,7 +632,41 @@ body: |
     ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX8: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
     ; GFX8: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>)
+    ; GFX8: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX8: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX8: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; GFX8: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; GFX8: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0
+    ; GFX8: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>)
+    ; GFX8: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX8: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
+    ; GFX8: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX8: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32)
+    ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]]
+    ; GFX8: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32)
+    ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; GFX8: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]]
+    ; GFX8: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
+    ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32)
+    ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
+    ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX8: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]]
+    ; GFX8: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>)
     ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
     ; GFX9-LABEL: name: ssubsat_v3s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -637,7 +704,28 @@ body: |
     ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
     ; GFX9: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>)
+    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0
+    ; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>)
+    ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
+    ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
+    ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
+    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
     %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
index 00d9b72c32b9..077c6be94858 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
@@ -426,7 +426,40 @@ body: |
     ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
     ; GFX6: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>)
+    ; GFX6: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX6: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX6: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; GFX6: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; GFX6: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; GFX6: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0
+    ; GFX6: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>)
+    ; GFX6: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX6: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
+    ; GFX6: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX6: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32)
+    ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]]
+    ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C2]]
+    ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
+    ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL8]]
+    ; GFX6: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C2]]
+    ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32)
+    ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C2]]
+    ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
+    ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL9]]
+    ; GFX6: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; GFX6: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C2]]
+    ; GFX6: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32)
+    ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C2]]
+    ; GFX6: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
+    ; GFX6: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL10]]
+    ; GFX6: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>)
     ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
     ; GFX8-LABEL: name: uaddsat_v3s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -469,7 +502,41 @@ body: |
     ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX8: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
     ; GFX8: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>)
+    ; GFX8: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX8: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX8: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; GFX8: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; GFX8: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0
+    ; GFX8: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>)
+    ; GFX8: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX8: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
+    ; GFX8: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX8: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32)
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]]
+    ; GFX8: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32)
+    ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]]
+    ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; GFX8: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]]
+    ; GFX8: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]]
+    ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32)
+    ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]]
+    ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX8: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]]
+    ; GFX8: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>)
     ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
     ; GFX9-LABEL: name: uaddsat_v3s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -507,7 +574,28 @@ body: |
     ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
     ; GFX9: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>)
+    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0
+    ; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>)
+    ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
+    ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
+    ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
+    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
     %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir
index 687c6dfe001c..e7369c02c8c2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir
@@ -452,7 +452,40 @@ body: |
     ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
     ; GFX6: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>)
+    ; GFX6: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX6: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX6: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; GFX6: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; GFX6: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; GFX6: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; GFX6: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0
+    ; GFX6: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>)
+    ; GFX6: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX6: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
+    ; GFX6: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX6: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32)
+    ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
+    ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+    ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL8]]
+    ; GFX6: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
+    ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32)
+    ; GFX6: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+    ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL9]]
+    ; GFX6: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; GFX6: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32)
+    ; GFX6: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]]
+    ; GFX6: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32)
+    ; GFX6: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C1]]
+    ; GFX6: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32)
+    ; GFX6: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL10]]
+    ; GFX6: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>)
     ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
     ; GFX8-LABEL: name: ushlsat_v3s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
index 0fef31a029bf..2bea00220a53 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
@@ -410,7 +410,40 @@ body: |
     ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
     ; GFX6: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>)
+    ; GFX6: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX6: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX6: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; GFX6: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; GFX6: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; GFX6: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0
+    ; GFX6: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>)
+    ; GFX6: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX6: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
+    ; GFX6: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX6: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32)
+    ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
+    ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
+    ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL8]]
+    ; GFX6: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
+    ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32)
+    ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+    ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
+    ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL9]]
+    ; GFX6: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; GFX6: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]]
+    ; GFX6: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32)
+    ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C1]]
+    ; GFX6: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
+    ; GFX6: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL10]]
+    ; GFX6: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>)
     ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
     ; GFX8-LABEL: name: usubsat_v3s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -453,7 +486,41 @@ body: |
     ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX8: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
     ; GFX8: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>)
+    ; GFX8: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX8: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX8: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; GFX8: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; GFX8: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0
+    ; GFX8: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>)
+    ; GFX8: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX8: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
+    ; GFX8: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX8: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32)
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]]
+    ; GFX8: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32)
+    ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]]
+    ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; GFX8: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]]
+    ; GFX8: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]]
+    ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32)
+    ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]]
+    ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX8: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]]
+    ; GFX8: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>)
     ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
     ; GFX9-LABEL: name: usubsat_v3s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -491,7 +558,28 @@ body: |
     ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
     ; GFX9: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>)
+    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0
+    ; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>)
+    ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
+    ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
+    ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
+    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
     %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
index 538ac6f8f796..e4be432f3dab 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
@@ -504,12 +504,47 @@ body: |
     ; CHECK: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT4]], [[INSERT5]]
     ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[XOR1]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>)
     ; CHECK: [[UV20:%[0-9]+]]:_(<3 x s16>), [[UV21:%[0-9]+]]:_(<3 x s16>), [[UV22:%[0-9]+]]:_(<3 x s16>), [[UV23:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>)
-    ; CHECK: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV16]](<3 x s16>), [[UV20]](<3 x s16>)
+    ; CHECK: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV16]](<3 x s16>), 0
+    ; CHECK: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT6]](<4 x s16>)
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV20]](<3 x s16>), 0
+    ; CHECK: [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<4 x s16>)
+    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>)
+    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>)
+    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CHECK: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
     ; CHECK: [[CONCAT_VECTORS5:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS4]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; CHECK: [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>), [[UV26:%[0-9]+]]:_(<5 x s16>), [[UV27:%[0-9]+]]:_(<5 x s16>), [[UV28:%[0-9]+]]:_(<5 x s16>), [[UV29:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<30 x s16>)
+    ; CHECK: [[UV28:%[0-9]+]]:_(<5 x s16>), [[UV29:%[0-9]+]]:_(<5 x s16>), [[UV30:%[0-9]+]]:_(<5 x s16>), [[UV31:%[0-9]+]]:_(<5 x s16>), [[UV32:%[0-9]+]]:_(<5 x s16>), [[UV33:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<30 x s16>)
     ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV24]](<5 x s16>), 0
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>)
+    ; CHECK: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV28]](<5 x s16>), 0
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT8]](<8 x s16>)
     %0:_(<5 x s16>) = G_IMPLICIT_DEF
     %1:_(<5 x s16>) = G_IMPLICIT_DEF
     %2:_(<5 x s16>) = G_XOR %0, %1


        


More information about the llvm-commits mailing list