[llvm] 31a9659 - GlobalISel: Avoid use of G_INSERT in insertParts

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 8 11:44:34 PDT 2021


Author: Matt Arsenault
Date: 2021-06-08T14:44:24-04:00
New Revision: 31a9659de550b25b6bc0ad5cab73d133095c351f

URL: https://github.com/llvm/llvm-project/commit/31a9659de550b25b6bc0ad5cab73d133095c351f
DIFF: https://github.com/llvm/llvm-project/commit/31a9659de550b25b6bc0ad5cab73d133095c351f.diff

LOG: GlobalISel: Avoid use of G_INSERT in insertParts

G_INSERT legalization is incomplete and doesn't work very
well. Instead try to use sequences of G_MERGE_VALUES/G_UNMERGE_VALUES
padding with undef values (although this can get pretty large).

For the case of load/store narrowing, this is still performing the
load/stores in irregularly sized pieces. It might be cleaner to split
this down into equal sized pieces, and rely on load/store merging to
optimize it.

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll
    llvm/test/CodeGen/AMDGPU/ds-alignment.ll
    llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 448e80960cc4..f9de8201b628 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -220,29 +220,16 @@ void LegalizerHelper::insertParts(Register DstReg,
     return;
   }
 
-  unsigned PartSize = PartTy.getSizeInBits();
-  unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
+  SmallVector<Register> GCDRegs;
+  LLT GCDTy;
+  for (Register PartReg : PartRegs)
+    GCDTy = extractGCDType(GCDRegs, ResultTy, LeftoverTy, PartReg);
 
-  Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
-  MIRBuilder.buildUndef(CurResultReg);
+  for (Register PartReg : LeftoverRegs)
+    extractGCDType(GCDRegs, ResultTy, LeftoverTy, PartReg);
 
-  unsigned Offset = 0;
-  for (Register PartReg : PartRegs) {
-    Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
-    MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
-    CurResultReg = NewResultReg;
-    Offset += PartSize;
-  }
-
-  for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
-    // Use the original output register for the final insert to avoid a copy.
-    Register NewResultReg = (I + 1 == E) ?
-      DstReg : MRI.createGenericVirtualRegister(ResultTy);
-
-    MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
-    CurResultReg = NewResultReg;
-    Offset += LeftoverPartSize;
-  }
+  LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
+  buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
 }
 
 /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
index 6332fa4a5c8e..0cc39df24752 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@@ -80,7 +80,7 @@ end:
   br label %block
 }
 
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: %{{[0-9]+}}:gpr32(s32), %{{[0-9]+}}:gpr(s1) = G_UADDE %{{[0-9]+}}:gpr, %{{[0-9]+}}:gpr, %{{[0-9]+}}:gpr (in function: nonpow2_add_narrowing)
+; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: %{{[0-9]+}}:gpr(s32), %{{[0-9]+}}:gpr(s32) = G_UNMERGE_VALUES %{{[0-9]+}}:gpr(s64) (in function: nonpow2_add_narrowing)
 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_add_narrowing
 ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_add_narrowing:
 define void @nonpow2_add_narrowing(i128 %x, i128 %y) {

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir
index 65c52e1ceb80..49becbb545a4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir
@@ -89,16 +89,12 @@ body:             |
     ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](s64), 0
     ; CHECK: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[COPY4]], [[COPY5]]
     ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[EXTRACT]], [[EXTRACT1]], [[UADDO1]]
-    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64)
-    ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s64), 0
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT2]](s32)
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY [[UADDO]](s64)
-    ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[ANYEXT]](s64), 0
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT3]](s32)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[ANYEXT1]], [[UADDE]](s32), 0
-    ; CHECK: $x0 = COPY [[COPY6]](s64)
-    ; CHECK: $x1 = COPY [[INSERT]](s64)
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDO]](s64)
+    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
+    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE]](s32), [[DEF]](s32)
+    ; CHECK: $x0 = COPY [[MV]](s64)
+    ; CHECK: $x1 = COPY [[MV1]](s64)
     %0:_(s64) = COPY $x0
     %1:_(s64) = COPY $x1
     %2:_(s64) = COPY $x2
@@ -131,17 +127,669 @@ body:             |
     ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[EXTRACT]](s1)
     ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[EXTRACT1]](s1)
     ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[ZEXT]], [[ZEXT1]], [[UADDO1]]
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[UADDE]](s32)
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s1) = COPY [[TRUNC]](s1)
-    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s1) = G_EXTRACT [[DEF]](s64), 0
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT2]](s1)
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY [[UADDO]](s64)
-    ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s1) = G_EXTRACT [[ANYEXT]](s64), 0
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT3]](s1)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[ANYEXT1]], [[COPY6]](s1), 0
-    ; CHECK: $x0 = COPY [[COPY7]](s64)
-    ; CHECK: $x1 = COPY [[INSERT]](s64)
+    ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDO]](s64)
+    ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
+    ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s8)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT2]], [[C1]](s64)
+    ; CHECK: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 2
+    ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT2]], [[C3]](s64)
+    ; CHECK: [[C4:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
+    ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT2]], [[C5]](s64)
+    ; CHECK: [[C6:%[0-9]+]]:_(s8) = G_CONSTANT i8 4
+    ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT2]], [[C7]](s64)
+    ; CHECK: [[C8:%[0-9]+]]:_(s8) = G_CONSTANT i8 5
+    ; CHECK: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT2]], [[C9]](s64)
+    ; CHECK: [[C10:%[0-9]+]]:_(s8) = G_CONSTANT i8 6
+    ; CHECK: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+    ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT2]], [[C11]](s64)
+    ; CHECK: [[C12:%[0-9]+]]:_(s8) = G_CONSTANT i8 7
+    ; CHECK: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT2]], [[C13]](s64)
+    ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT3]], [[C1]](s64)
+    ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT3]], [[C3]](s64)
+    ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT3]], [[C5]](s64)
+    ; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT3]], [[C7]](s64)
+    ; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT3]], [[C9]](s64)
+    ; CHECK: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT3]], [[C11]](s64)
+    ; CHECK: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT3]], [[C13]](s64)
+    ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s8)
+    ; CHECK: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT4]], [[C1]](s64)
+    ; CHECK: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT4]], [[C3]](s64)
+    ; CHECK: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT4]], [[C5]](s64)
+    ; CHECK: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT4]], [[C7]](s64)
+    ; CHECK: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT4]], [[C9]](s64)
+    ; CHECK: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT4]], [[C11]](s64)
+    ; CHECK: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT4]], [[C13]](s64)
+    ; CHECK: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s8)
+    ; CHECK: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT5]], [[C1]](s64)
+    ; CHECK: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT5]], [[C3]](s64)
+    ; CHECK: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT5]], [[C5]](s64)
+    ; CHECK: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT5]], [[C7]](s64)
+    ; CHECK: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT5]], [[C9]](s64)
+    ; CHECK: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT5]], [[C11]](s64)
+    ; CHECK: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT5]], [[C13]](s64)
+    ; CHECK: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UV4]](s8)
+    ; CHECK: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT6]], [[C1]](s64)
+    ; CHECK: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT6]], [[C3]](s64)
+    ; CHECK: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT6]], [[C5]](s64)
+    ; CHECK: [[LSHR31:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT6]], [[C7]](s64)
+    ; CHECK: [[LSHR32:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT6]], [[C9]](s64)
+    ; CHECK: [[LSHR33:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT6]], [[C11]](s64)
+    ; CHECK: [[LSHR34:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT6]], [[C13]](s64)
+    ; CHECK: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UV5]](s8)
+    ; CHECK: [[LSHR35:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT7]], [[C1]](s64)
+    ; CHECK: [[LSHR36:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT7]], [[C3]](s64)
+    ; CHECK: [[LSHR37:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT7]], [[C5]](s64)
+    ; CHECK: [[LSHR38:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT7]], [[C7]](s64)
+    ; CHECK: [[LSHR39:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT7]], [[C9]](s64)
+    ; CHECK: [[LSHR40:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT7]], [[C11]](s64)
+    ; CHECK: [[LSHR41:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT7]], [[C13]](s64)
+    ; CHECK: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UV6]](s8)
+    ; CHECK: [[LSHR42:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT8]], [[C1]](s64)
+    ; CHECK: [[LSHR43:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT8]], [[C3]](s64)
+    ; CHECK: [[LSHR44:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT8]], [[C5]](s64)
+    ; CHECK: [[LSHR45:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT8]], [[C7]](s64)
+    ; CHECK: [[LSHR46:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT8]], [[C9]](s64)
+    ; CHECK: [[LSHR47:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT8]], [[C11]](s64)
+    ; CHECK: [[LSHR48:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT8]], [[C13]](s64)
+    ; CHECK: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UV7]](s8)
+    ; CHECK: [[LSHR49:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT9]], [[C1]](s64)
+    ; CHECK: [[LSHR50:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT9]], [[C3]](s64)
+    ; CHECK: [[LSHR51:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT9]], [[C5]](s64)
+    ; CHECK: [[LSHR52:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT9]], [[C7]](s64)
+    ; CHECK: [[LSHR53:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT9]], [[C9]](s64)
+    ; CHECK: [[LSHR54:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT9]], [[C11]](s64)
+    ; CHECK: [[LSHR55:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT9]], [[C13]](s64)
+    ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C14]]
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s64)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C14]]
+    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[COPY7]]
+    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C14]]
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C3]](s64)
+    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32)
+    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[COPY10]]
+    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C14]]
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s64)
+    ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[OR1]](s32)
+    ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32)
+    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY12]], [[COPY13]]
+    ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C14]]
+    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C7]](s64)
+    ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[SHL3]](s32)
+    ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[COPY16]]
+    ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C14]]
+    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s64)
+    ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[OR3]](s32)
+    ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[SHL4]](s32)
+    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[COPY19]]
+    ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
+    ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C14]]
+    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C11]](s64)
+    ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[OR4]](s32)
+    ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[SHL5]](s32)
+    ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY21]], [[COPY22]]
+    ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C14]]
+    ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C13]](s64)
+    ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY [[OR5]](s32)
+    ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY [[SHL6]](s32)
+    ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY24]], [[COPY25]]
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[OR6]](s32)
+    ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C14]]
+    ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C1]](s64)
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
+    ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C14]]
+    ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY [[SHL7]](s32)
+    ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[COPY27]]
+    ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
+    ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C14]]
+    ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C3]](s64)
+    ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY [[OR7]](s32)
+    ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY [[SHL8]](s32)
+    ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY29]], [[COPY30]]
+    ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C14]]
+    ; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C5]](s64)
+    ; CHECK: [[COPY32:%[0-9]+]]:_(s32) = COPY [[OR8]](s32)
+    ; CHECK: [[COPY33:%[0-9]+]]:_(s32) = COPY [[SHL9]](s32)
+    ; CHECK: [[OR9:%[0-9]+]]:_(s32) = G_OR [[COPY32]], [[COPY33]]
+    ; CHECK: [[COPY34:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY34]], [[C14]]
+    ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C7]](s64)
+    ; CHECK: [[COPY35:%[0-9]+]]:_(s32) = COPY [[OR9]](s32)
+    ; CHECK: [[COPY36:%[0-9]+]]:_(s32) = COPY [[SHL10]](s32)
+    ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY35]], [[COPY36]]
+    ; CHECK: [[COPY37:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
+    ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY37]], [[C14]]
+    ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C9]](s64)
+    ; CHECK: [[COPY38:%[0-9]+]]:_(s32) = COPY [[OR10]](s32)
+    ; CHECK: [[COPY39:%[0-9]+]]:_(s32) = COPY [[SHL11]](s32)
+    ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[COPY38]], [[COPY39]]
+    ; CHECK: [[COPY40:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32)
+    ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY40]], [[C14]]
+    ; CHECK: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C11]](s64)
+    ; CHECK: [[COPY41:%[0-9]+]]:_(s32) = COPY [[OR11]](s32)
+    ; CHECK: [[COPY42:%[0-9]+]]:_(s32) = COPY [[SHL12]](s32)
+    ; CHECK: [[OR12:%[0-9]+]]:_(s32) = G_OR [[COPY41]], [[COPY42]]
+    ; CHECK: [[COPY43:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32)
+    ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY43]], [[C14]]
+    ; CHECK: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C13]](s64)
+    ; CHECK: [[COPY44:%[0-9]+]]:_(s32) = COPY [[OR12]](s32)
+    ; CHECK: [[COPY45:%[0-9]+]]:_(s32) = COPY [[SHL13]](s32)
+    ; CHECK: [[OR13:%[0-9]+]]:_(s32) = G_OR [[COPY44]], [[COPY45]]
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[OR13]](s32)
+    ; CHECK: [[COPY46:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32)
+    ; CHECK: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY46]], [[C14]]
+    ; CHECK: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND16]], [[C1]](s64)
+    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
+    ; CHECK: [[AND17:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C14]]
+    ; CHECK: [[COPY47:%[0-9]+]]:_(s32) = COPY [[SHL14]](s32)
+    ; CHECK: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND17]], [[COPY47]]
+    ; CHECK: [[COPY48:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32)
+    ; CHECK: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY48]], [[C14]]
+    ; CHECK: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C3]](s64)
+    ; CHECK: [[COPY49:%[0-9]+]]:_(s32) = COPY [[OR14]](s32)
+    ; CHECK: [[COPY50:%[0-9]+]]:_(s32) = COPY [[SHL15]](s32)
+    ; CHECK: [[OR15:%[0-9]+]]:_(s32) = G_OR [[COPY49]], [[COPY50]]
+    ; CHECK: [[COPY51:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32)
+    ; CHECK: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY51]], [[C14]]
+    ; CHECK: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C5]](s64)
+    ; CHECK: [[COPY52:%[0-9]+]]:_(s32) = COPY [[OR15]](s32)
+    ; CHECK: [[COPY53:%[0-9]+]]:_(s32) = COPY [[SHL16]](s32)
+    ; CHECK: [[OR16:%[0-9]+]]:_(s32) = G_OR [[COPY52]], [[COPY53]]
+    ; CHECK: [[COPY54:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32)
+    ; CHECK: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY54]], [[C14]]
+    ; CHECK: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C7]](s64)
+    ; CHECK: [[COPY55:%[0-9]+]]:_(s32) = COPY [[OR16]](s32)
+    ; CHECK: [[COPY56:%[0-9]+]]:_(s32) = COPY [[SHL17]](s32)
+    ; CHECK: [[OR17:%[0-9]+]]:_(s32) = G_OR [[COPY55]], [[COPY56]]
+    ; CHECK: [[COPY57:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32)
+    ; CHECK: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY57]], [[C14]]
+    ; CHECK: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C9]](s64)
+    ; CHECK: [[COPY58:%[0-9]+]]:_(s32) = COPY [[OR17]](s32)
+    ; CHECK: [[COPY59:%[0-9]+]]:_(s32) = COPY [[SHL18]](s32)
+    ; CHECK: [[OR18:%[0-9]+]]:_(s32) = G_OR [[COPY58]], [[COPY59]]
+    ; CHECK: [[COPY60:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32)
+    ; CHECK: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY60]], [[C14]]
+    ; CHECK: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C11]](s64)
+    ; CHECK: [[COPY61:%[0-9]+]]:_(s32) = COPY [[OR18]](s32)
+    ; CHECK: [[COPY62:%[0-9]+]]:_(s32) = COPY [[SHL19]](s32)
+    ; CHECK: [[OR19:%[0-9]+]]:_(s32) = G_OR [[COPY61]], [[COPY62]]
+    ; CHECK: [[COPY63:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32)
+    ; CHECK: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY63]], [[C14]]
+    ; CHECK: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C13]](s64)
+    ; CHECK: [[COPY64:%[0-9]+]]:_(s32) = COPY [[OR19]](s32)
+    ; CHECK: [[COPY65:%[0-9]+]]:_(s32) = COPY [[SHL20]](s32)
+    ; CHECK: [[OR20:%[0-9]+]]:_(s32) = G_OR [[COPY64]], [[COPY65]]
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[OR20]](s32)
+    ; CHECK: [[COPY66:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32)
+    ; CHECK: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY66]], [[C14]]
+    ; CHECK: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C1]](s64)
+    ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
+    ; CHECK: [[AND25:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C14]]
+    ; CHECK: [[COPY67:%[0-9]+]]:_(s32) = COPY [[SHL21]](s32)
+    ; CHECK: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[COPY67]]
+    ; CHECK: [[COPY68:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32)
+    ; CHECK: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY68]], [[C14]]
+    ; CHECK: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C3]](s64)
+    ; CHECK: [[COPY69:%[0-9]+]]:_(s32) = COPY [[OR21]](s32)
+    ; CHECK: [[COPY70:%[0-9]+]]:_(s32) = COPY [[SHL22]](s32)
+    ; CHECK: [[OR22:%[0-9]+]]:_(s32) = G_OR [[COPY69]], [[COPY70]]
+    ; CHECK: [[COPY71:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32)
+    ; CHECK: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY71]], [[C14]]
+    ; CHECK: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C5]](s64)
+    ; CHECK: [[COPY72:%[0-9]+]]:_(s32) = COPY [[OR22]](s32)
+    ; CHECK: [[COPY73:%[0-9]+]]:_(s32) = COPY [[SHL23]](s32)
+    ; CHECK: [[OR23:%[0-9]+]]:_(s32) = G_OR [[COPY72]], [[COPY73]]
+    ; CHECK: [[COPY74:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32)
+    ; CHECK: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY74]], [[C14]]
+    ; CHECK: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[AND28]], [[C7]](s64)
+    ; CHECK: [[COPY75:%[0-9]+]]:_(s32) = COPY [[OR23]](s32)
+    ; CHECK: [[COPY76:%[0-9]+]]:_(s32) = COPY [[SHL24]](s32)
+    ; CHECK: [[OR24:%[0-9]+]]:_(s32) = G_OR [[COPY75]], [[COPY76]]
+    ; CHECK: [[COPY77:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32)
+    ; CHECK: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY77]], [[C14]]
+    ; CHECK: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C9]](s64)
+    ; CHECK: [[COPY78:%[0-9]+]]:_(s32) = COPY [[OR24]](s32)
+    ; CHECK: [[COPY79:%[0-9]+]]:_(s32) = COPY [[SHL25]](s32)
+    ; CHECK: [[OR25:%[0-9]+]]:_(s32) = G_OR [[COPY78]], [[COPY79]]
+    ; CHECK: [[COPY80:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32)
+    ; CHECK: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY80]], [[C14]]
+    ; CHECK: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C11]](s64)
+    ; CHECK: [[COPY81:%[0-9]+]]:_(s32) = COPY [[OR25]](s32)
+    ; CHECK: [[COPY82:%[0-9]+]]:_(s32) = COPY [[SHL26]](s32)
+    ; CHECK: [[OR26:%[0-9]+]]:_(s32) = G_OR [[COPY81]], [[COPY82]]
+    ; CHECK: [[COPY83:%[0-9]+]]:_(s32) = COPY [[LSHR27]](s32)
+    ; CHECK: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY83]], [[C14]]
+    ; CHECK: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C13]](s64)
+    ; CHECK: [[COPY84:%[0-9]+]]:_(s32) = COPY [[OR26]](s32)
+    ; CHECK: [[COPY85:%[0-9]+]]:_(s32) = COPY [[SHL27]](s32)
+    ; CHECK: [[OR27:%[0-9]+]]:_(s32) = G_OR [[COPY84]], [[COPY85]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR27]](s32)
+    ; CHECK: [[COPY86:%[0-9]+]]:_(s32) = COPY [[LSHR28]](s32)
+    ; CHECK: [[AND32:%[0-9]+]]:_(s32) = G_AND [[COPY86]], [[C14]]
+    ; CHECK: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[AND32]], [[C1]](s64)
+    ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8)
+    ; CHECK: [[AND33:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C14]]
+    ; CHECK: [[COPY87:%[0-9]+]]:_(s32) = COPY [[SHL28]](s32)
+    ; CHECK: [[OR28:%[0-9]+]]:_(s32) = G_OR [[AND33]], [[COPY87]]
+    ; CHECK: [[COPY88:%[0-9]+]]:_(s32) = COPY [[LSHR29]](s32)
+    ; CHECK: [[AND34:%[0-9]+]]:_(s32) = G_AND [[COPY88]], [[C14]]
+    ; CHECK: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[AND34]], [[C3]](s64)
+    ; CHECK: [[COPY89:%[0-9]+]]:_(s32) = COPY [[OR28]](s32)
+    ; CHECK: [[COPY90:%[0-9]+]]:_(s32) = COPY [[SHL29]](s32)
+    ; CHECK: [[OR29:%[0-9]+]]:_(s32) = G_OR [[COPY89]], [[COPY90]]
+    ; CHECK: [[COPY91:%[0-9]+]]:_(s32) = COPY [[LSHR30]](s32)
+    ; CHECK: [[AND35:%[0-9]+]]:_(s32) = G_AND [[COPY91]], [[C14]]
+    ; CHECK: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[AND35]], [[C5]](s64)
+    ; CHECK: [[COPY92:%[0-9]+]]:_(s32) = COPY [[OR29]](s32)
+    ; CHECK: [[COPY93:%[0-9]+]]:_(s32) = COPY [[SHL30]](s32)
+    ; CHECK: [[OR30:%[0-9]+]]:_(s32) = G_OR [[COPY92]], [[COPY93]]
+    ; CHECK: [[COPY94:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32)
+    ; CHECK: [[AND36:%[0-9]+]]:_(s32) = G_AND [[COPY94]], [[C14]]
+    ; CHECK: [[SHL31:%[0-9]+]]:_(s32) = G_SHL [[AND36]], [[C7]](s64)
+    ; CHECK: [[COPY95:%[0-9]+]]:_(s32) = COPY [[OR30]](s32)
+    ; CHECK: [[COPY96:%[0-9]+]]:_(s32) = COPY [[SHL31]](s32)
+    ; CHECK: [[OR31:%[0-9]+]]:_(s32) = G_OR [[COPY95]], [[COPY96]]
+    ; CHECK: [[COPY97:%[0-9]+]]:_(s32) = COPY [[LSHR32]](s32)
+    ; CHECK: [[AND37:%[0-9]+]]:_(s32) = G_AND [[COPY97]], [[C14]]
+    ; CHECK: [[SHL32:%[0-9]+]]:_(s32) = G_SHL [[AND37]], [[C9]](s64)
+    ; CHECK: [[COPY98:%[0-9]+]]:_(s32) = COPY [[OR31]](s32)
+    ; CHECK: [[COPY99:%[0-9]+]]:_(s32) = COPY [[SHL32]](s32)
+    ; CHECK: [[OR32:%[0-9]+]]:_(s32) = G_OR [[COPY98]], [[COPY99]]
+    ; CHECK: [[COPY100:%[0-9]+]]:_(s32) = COPY [[LSHR33]](s32)
+    ; CHECK: [[AND38:%[0-9]+]]:_(s32) = G_AND [[COPY100]], [[C14]]
+    ; CHECK: [[SHL33:%[0-9]+]]:_(s32) = G_SHL [[AND38]], [[C11]](s64)
+    ; CHECK: [[COPY101:%[0-9]+]]:_(s32) = COPY [[OR32]](s32)
+    ; CHECK: [[COPY102:%[0-9]+]]:_(s32) = COPY [[SHL33]](s32)
+    ; CHECK: [[OR33:%[0-9]+]]:_(s32) = G_OR [[COPY101]], [[COPY102]]
+    ; CHECK: [[COPY103:%[0-9]+]]:_(s32) = COPY [[LSHR34]](s32)
+    ; CHECK: [[AND39:%[0-9]+]]:_(s32) = G_AND [[COPY103]], [[C14]]
+    ; CHECK: [[SHL34:%[0-9]+]]:_(s32) = G_SHL [[AND39]], [[C13]](s64)
+    ; CHECK: [[COPY104:%[0-9]+]]:_(s32) = COPY [[OR33]](s32)
+    ; CHECK: [[COPY105:%[0-9]+]]:_(s32) = COPY [[SHL34]](s32)
+    ; CHECK: [[OR34:%[0-9]+]]:_(s32) = G_OR [[COPY104]], [[COPY105]]
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[OR34]](s32)
+    ; CHECK: [[COPY106:%[0-9]+]]:_(s32) = COPY [[LSHR35]](s32)
+    ; CHECK: [[AND40:%[0-9]+]]:_(s32) = G_AND [[COPY106]], [[C14]]
+    ; CHECK: [[SHL35:%[0-9]+]]:_(s32) = G_SHL [[AND40]], [[C1]](s64)
+    ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8)
+    ; CHECK: [[AND41:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C14]]
+    ; CHECK: [[COPY107:%[0-9]+]]:_(s32) = COPY [[SHL35]](s32)
+    ; CHECK: [[OR35:%[0-9]+]]:_(s32) = G_OR [[AND41]], [[COPY107]]
+    ; CHECK: [[COPY108:%[0-9]+]]:_(s32) = COPY [[LSHR36]](s32)
+    ; CHECK: [[AND42:%[0-9]+]]:_(s32) = G_AND [[COPY108]], [[C14]]
+    ; CHECK: [[SHL36:%[0-9]+]]:_(s32) = G_SHL [[AND42]], [[C3]](s64)
+    ; CHECK: [[COPY109:%[0-9]+]]:_(s32) = COPY [[OR35]](s32)
+    ; CHECK: [[COPY110:%[0-9]+]]:_(s32) = COPY [[SHL36]](s32)
+    ; CHECK: [[OR36:%[0-9]+]]:_(s32) = G_OR [[COPY109]], [[COPY110]]
+    ; CHECK: [[COPY111:%[0-9]+]]:_(s32) = COPY [[LSHR37]](s32)
+    ; CHECK: [[AND43:%[0-9]+]]:_(s32) = G_AND [[COPY111]], [[C14]]
+    ; CHECK: [[SHL37:%[0-9]+]]:_(s32) = G_SHL [[AND43]], [[C5]](s64)
+    ; CHECK: [[COPY112:%[0-9]+]]:_(s32) = COPY [[OR36]](s32)
+    ; CHECK: [[COPY113:%[0-9]+]]:_(s32) = COPY [[SHL37]](s32)
+    ; CHECK: [[OR37:%[0-9]+]]:_(s32) = G_OR [[COPY112]], [[COPY113]]
+    ; CHECK: [[COPY114:%[0-9]+]]:_(s32) = COPY [[LSHR38]](s32)
+    ; CHECK: [[AND44:%[0-9]+]]:_(s32) = G_AND [[COPY114]], [[C14]]
+    ; CHECK: [[SHL38:%[0-9]+]]:_(s32) = G_SHL [[AND44]], [[C7]](s64)
+    ; CHECK: [[COPY115:%[0-9]+]]:_(s32) = COPY [[OR37]](s32)
+    ; CHECK: [[COPY116:%[0-9]+]]:_(s32) = COPY [[SHL38]](s32)
+    ; CHECK: [[OR38:%[0-9]+]]:_(s32) = G_OR [[COPY115]], [[COPY116]]
+    ; CHECK: [[COPY117:%[0-9]+]]:_(s32) = COPY [[LSHR39]](s32)
+    ; CHECK: [[AND45:%[0-9]+]]:_(s32) = G_AND [[COPY117]], [[C14]]
+    ; CHECK: [[SHL39:%[0-9]+]]:_(s32) = G_SHL [[AND45]], [[C9]](s64)
+    ; CHECK: [[COPY118:%[0-9]+]]:_(s32) = COPY [[OR38]](s32)
+    ; CHECK: [[COPY119:%[0-9]+]]:_(s32) = COPY [[SHL39]](s32)
+    ; CHECK: [[OR39:%[0-9]+]]:_(s32) = G_OR [[COPY118]], [[COPY119]]
+    ; CHECK: [[COPY120:%[0-9]+]]:_(s32) = COPY [[LSHR40]](s32)
+    ; CHECK: [[AND46:%[0-9]+]]:_(s32) = G_AND [[COPY120]], [[C14]]
+    ; CHECK: [[SHL40:%[0-9]+]]:_(s32) = G_SHL [[AND46]], [[C11]](s64)
+    ; CHECK: [[COPY121:%[0-9]+]]:_(s32) = COPY [[OR39]](s32)
+    ; CHECK: [[COPY122:%[0-9]+]]:_(s32) = COPY [[SHL40]](s32)
+    ; CHECK: [[OR40:%[0-9]+]]:_(s32) = G_OR [[COPY121]], [[COPY122]]
+    ; CHECK: [[COPY123:%[0-9]+]]:_(s32) = COPY [[LSHR41]](s32)
+    ; CHECK: [[AND47:%[0-9]+]]:_(s32) = G_AND [[COPY123]], [[C14]]
+    ; CHECK: [[SHL41:%[0-9]+]]:_(s32) = G_SHL [[AND47]], [[C13]](s64)
+    ; CHECK: [[COPY124:%[0-9]+]]:_(s32) = COPY [[OR40]](s32)
+    ; CHECK: [[COPY125:%[0-9]+]]:_(s32) = COPY [[SHL41]](s32)
+    ; CHECK: [[OR41:%[0-9]+]]:_(s32) = G_OR [[COPY124]], [[COPY125]]
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[OR41]](s32)
+    ; CHECK: [[COPY126:%[0-9]+]]:_(s32) = COPY [[LSHR42]](s32)
+    ; CHECK: [[AND48:%[0-9]+]]:_(s32) = G_AND [[COPY126]], [[C14]]
+    ; CHECK: [[SHL42:%[0-9]+]]:_(s32) = G_SHL [[AND48]], [[C1]](s64)
+    ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8)
+    ; CHECK: [[AND49:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C14]]
+    ; CHECK: [[COPY127:%[0-9]+]]:_(s32) = COPY [[SHL42]](s32)
+    ; CHECK: [[OR42:%[0-9]+]]:_(s32) = G_OR [[AND49]], [[COPY127]]
+    ; CHECK: [[COPY128:%[0-9]+]]:_(s32) = COPY [[LSHR43]](s32)
+    ; CHECK: [[AND50:%[0-9]+]]:_(s32) = G_AND [[COPY128]], [[C14]]
+    ; CHECK: [[SHL43:%[0-9]+]]:_(s32) = G_SHL [[AND50]], [[C3]](s64)
+    ; CHECK: [[COPY129:%[0-9]+]]:_(s32) = COPY [[OR42]](s32)
+    ; CHECK: [[COPY130:%[0-9]+]]:_(s32) = COPY [[SHL43]](s32)
+    ; CHECK: [[OR43:%[0-9]+]]:_(s32) = G_OR [[COPY129]], [[COPY130]]
+    ; CHECK: [[COPY131:%[0-9]+]]:_(s32) = COPY [[LSHR44]](s32)
+    ; CHECK: [[AND51:%[0-9]+]]:_(s32) = G_AND [[COPY131]], [[C14]]
+    ; CHECK: [[SHL44:%[0-9]+]]:_(s32) = G_SHL [[AND51]], [[C5]](s64)
+    ; CHECK: [[COPY132:%[0-9]+]]:_(s32) = COPY [[OR43]](s32)
+    ; CHECK: [[COPY133:%[0-9]+]]:_(s32) = COPY [[SHL44]](s32)
+    ; CHECK: [[OR44:%[0-9]+]]:_(s32) = G_OR [[COPY132]], [[COPY133]]
+    ; CHECK: [[COPY134:%[0-9]+]]:_(s32) = COPY [[LSHR45]](s32)
+    ; CHECK: [[AND52:%[0-9]+]]:_(s32) = G_AND [[COPY134]], [[C14]]
+    ; CHECK: [[SHL45:%[0-9]+]]:_(s32) = G_SHL [[AND52]], [[C7]](s64)
+    ; CHECK: [[COPY135:%[0-9]+]]:_(s32) = COPY [[OR44]](s32)
+    ; CHECK: [[COPY136:%[0-9]+]]:_(s32) = COPY [[SHL45]](s32)
+    ; CHECK: [[OR45:%[0-9]+]]:_(s32) = G_OR [[COPY135]], [[COPY136]]
+    ; CHECK: [[COPY137:%[0-9]+]]:_(s32) = COPY [[LSHR46]](s32)
+    ; CHECK: [[AND53:%[0-9]+]]:_(s32) = G_AND [[COPY137]], [[C14]]
+    ; CHECK: [[SHL46:%[0-9]+]]:_(s32) = G_SHL [[AND53]], [[C9]](s64)
+    ; CHECK: [[COPY138:%[0-9]+]]:_(s32) = COPY [[OR45]](s32)
+    ; CHECK: [[COPY139:%[0-9]+]]:_(s32) = COPY [[SHL46]](s32)
+    ; CHECK: [[OR46:%[0-9]+]]:_(s32) = G_OR [[COPY138]], [[COPY139]]
+    ; CHECK: [[COPY140:%[0-9]+]]:_(s32) = COPY [[LSHR47]](s32)
+    ; CHECK: [[AND54:%[0-9]+]]:_(s32) = G_AND [[COPY140]], [[C14]]
+    ; CHECK: [[SHL47:%[0-9]+]]:_(s32) = G_SHL [[AND54]], [[C11]](s64)
+    ; CHECK: [[COPY141:%[0-9]+]]:_(s32) = COPY [[OR46]](s32)
+    ; CHECK: [[COPY142:%[0-9]+]]:_(s32) = COPY [[SHL47]](s32)
+    ; CHECK: [[OR47:%[0-9]+]]:_(s32) = G_OR [[COPY141]], [[COPY142]]
+    ; CHECK: [[COPY143:%[0-9]+]]:_(s32) = COPY [[LSHR48]](s32)
+    ; CHECK: [[AND55:%[0-9]+]]:_(s32) = G_AND [[COPY143]], [[C14]]
+    ; CHECK: [[SHL48:%[0-9]+]]:_(s32) = G_SHL [[AND55]], [[C13]](s64)
+    ; CHECK: [[COPY144:%[0-9]+]]:_(s32) = COPY [[OR47]](s32)
+    ; CHECK: [[COPY145:%[0-9]+]]:_(s32) = COPY [[SHL48]](s32)
+    ; CHECK: [[OR48:%[0-9]+]]:_(s32) = G_OR [[COPY144]], [[COPY145]]
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[OR48]](s32)
+    ; CHECK: [[COPY146:%[0-9]+]]:_(s32) = COPY [[LSHR49]](s32)
+    ; CHECK: [[AND56:%[0-9]+]]:_(s32) = G_AND [[COPY146]], [[C14]]
+    ; CHECK: [[SHL49:%[0-9]+]]:_(s32) = G_SHL [[AND56]], [[C1]](s64)
+    ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8)
+    ; CHECK: [[AND57:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C14]]
+    ; CHECK: [[COPY147:%[0-9]+]]:_(s32) = COPY [[SHL49]](s32)
+    ; CHECK: [[OR49:%[0-9]+]]:_(s32) = G_OR [[AND57]], [[COPY147]]
+    ; CHECK: [[COPY148:%[0-9]+]]:_(s32) = COPY [[LSHR50]](s32)
+    ; CHECK: [[AND58:%[0-9]+]]:_(s32) = G_AND [[COPY148]], [[C14]]
+    ; CHECK: [[SHL50:%[0-9]+]]:_(s32) = G_SHL [[AND58]], [[C3]](s64)
+    ; CHECK: [[COPY149:%[0-9]+]]:_(s32) = COPY [[OR49]](s32)
+    ; CHECK: [[COPY150:%[0-9]+]]:_(s32) = COPY [[SHL50]](s32)
+    ; CHECK: [[OR50:%[0-9]+]]:_(s32) = G_OR [[COPY149]], [[COPY150]]
+    ; CHECK: [[COPY151:%[0-9]+]]:_(s32) = COPY [[LSHR51]](s32)
+    ; CHECK: [[AND59:%[0-9]+]]:_(s32) = G_AND [[COPY151]], [[C14]]
+    ; CHECK: [[SHL51:%[0-9]+]]:_(s32) = G_SHL [[AND59]], [[C5]](s64)
+    ; CHECK: [[COPY152:%[0-9]+]]:_(s32) = COPY [[OR50]](s32)
+    ; CHECK: [[COPY153:%[0-9]+]]:_(s32) = COPY [[SHL51]](s32)
+    ; CHECK: [[OR51:%[0-9]+]]:_(s32) = G_OR [[COPY152]], [[COPY153]]
+    ; CHECK: [[COPY154:%[0-9]+]]:_(s32) = COPY [[LSHR52]](s32)
+    ; CHECK: [[AND60:%[0-9]+]]:_(s32) = G_AND [[COPY154]], [[C14]]
+    ; CHECK: [[SHL52:%[0-9]+]]:_(s32) = G_SHL [[AND60]], [[C7]](s64)
+    ; CHECK: [[COPY155:%[0-9]+]]:_(s32) = COPY [[OR51]](s32)
+    ; CHECK: [[COPY156:%[0-9]+]]:_(s32) = COPY [[SHL52]](s32)
+    ; CHECK: [[OR52:%[0-9]+]]:_(s32) = G_OR [[COPY155]], [[COPY156]]
+    ; CHECK: [[COPY157:%[0-9]+]]:_(s32) = COPY [[LSHR53]](s32)
+    ; CHECK: [[AND61:%[0-9]+]]:_(s32) = G_AND [[COPY157]], [[C14]]
+    ; CHECK: [[SHL53:%[0-9]+]]:_(s32) = G_SHL [[AND61]], [[C9]](s64)
+    ; CHECK: [[COPY158:%[0-9]+]]:_(s32) = COPY [[OR52]](s32)
+    ; CHECK: [[COPY159:%[0-9]+]]:_(s32) = COPY [[SHL53]](s32)
+    ; CHECK: [[OR53:%[0-9]+]]:_(s32) = G_OR [[COPY158]], [[COPY159]]
+    ; CHECK: [[COPY160:%[0-9]+]]:_(s32) = COPY [[LSHR54]](s32)
+    ; CHECK: [[AND62:%[0-9]+]]:_(s32) = G_AND [[COPY160]], [[C14]]
+    ; CHECK: [[SHL54:%[0-9]+]]:_(s32) = G_SHL [[AND62]], [[C11]](s64)
+    ; CHECK: [[COPY161:%[0-9]+]]:_(s32) = COPY [[OR53]](s32)
+    ; CHECK: [[COPY162:%[0-9]+]]:_(s32) = COPY [[SHL54]](s32)
+    ; CHECK: [[OR54:%[0-9]+]]:_(s32) = G_OR [[COPY161]], [[COPY162]]
+    ; CHECK: [[COPY163:%[0-9]+]]:_(s32) = COPY [[LSHR55]](s32)
+    ; CHECK: [[AND63:%[0-9]+]]:_(s32) = G_AND [[COPY163]], [[C14]]
+    ; CHECK: [[SHL55:%[0-9]+]]:_(s32) = G_SHL [[AND63]], [[C13]](s64)
+    ; CHECK: [[COPY164:%[0-9]+]]:_(s32) = COPY [[OR54]](s32)
+    ; CHECK: [[COPY165:%[0-9]+]]:_(s32) = COPY [[SHL55]](s32)
+    ; CHECK: [[OR55:%[0-9]+]]:_(s32) = G_OR [[COPY164]], [[COPY165]]
+    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[OR55]](s32)
+    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8)
+    ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[SHL56:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C1]](s64)
+    ; CHECK: [[COPY166:%[0-9]+]]:_(s32) = COPY [[UADDE]](s32)
+    ; CHECK: [[AND64:%[0-9]+]]:_(s32) = G_AND [[COPY166]], [[C14]]
+    ; CHECK: [[COPY167:%[0-9]+]]:_(s32) = COPY [[SHL56]](s32)
+    ; CHECK: [[OR56:%[0-9]+]]:_(s32) = G_OR [[AND64]], [[COPY167]]
+    ; CHECK: [[SHL57:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C3]](s64)
+    ; CHECK: [[COPY168:%[0-9]+]]:_(s32) = COPY [[OR56]](s32)
+    ; CHECK: [[COPY169:%[0-9]+]]:_(s32) = COPY [[SHL57]](s32)
+    ; CHECK: [[OR57:%[0-9]+]]:_(s32) = G_OR [[COPY168]], [[COPY169]]
+    ; CHECK: [[SHL58:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C5]](s64)
+    ; CHECK: [[COPY170:%[0-9]+]]:_(s32) = COPY [[OR57]](s32)
+    ; CHECK: [[COPY171:%[0-9]+]]:_(s32) = COPY [[SHL58]](s32)
+    ; CHECK: [[OR58:%[0-9]+]]:_(s32) = G_OR [[COPY170]], [[COPY171]]
+    ; CHECK: [[SHL59:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C7]](s64)
+    ; CHECK: [[COPY172:%[0-9]+]]:_(s32) = COPY [[OR58]](s32)
+    ; CHECK: [[COPY173:%[0-9]+]]:_(s32) = COPY [[SHL59]](s32)
+    ; CHECK: [[OR59:%[0-9]+]]:_(s32) = G_OR [[COPY172]], [[COPY173]]
+    ; CHECK: [[SHL60:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C9]](s64)
+    ; CHECK: [[COPY174:%[0-9]+]]:_(s32) = COPY [[OR59]](s32)
+    ; CHECK: [[COPY175:%[0-9]+]]:_(s32) = COPY [[SHL60]](s32)
+    ; CHECK: [[OR60:%[0-9]+]]:_(s32) = G_OR [[COPY174]], [[COPY175]]
+    ; CHECK: [[SHL61:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C11]](s64)
+    ; CHECK: [[COPY176:%[0-9]+]]:_(s32) = COPY [[OR60]](s32)
+    ; CHECK: [[COPY177:%[0-9]+]]:_(s32) = COPY [[SHL61]](s32)
+    ; CHECK: [[OR61:%[0-9]+]]:_(s32) = G_OR [[COPY176]], [[COPY177]]
+    ; CHECK: [[SHL62:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C13]](s64)
+    ; CHECK: [[COPY178:%[0-9]+]]:_(s32) = COPY [[OR61]](s32)
+    ; CHECK: [[COPY179:%[0-9]+]]:_(s32) = COPY [[SHL62]](s32)
+    ; CHECK: [[OR62:%[0-9]+]]:_(s32) = G_OR [[COPY178]], [[COPY179]]
+    ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[OR62]](s32)
+    ; CHECK: [[SHL63:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C1]](s64)
+    ; CHECK: [[COPY180:%[0-9]+]]:_(s32) = COPY [[SHL63]](s32)
+    ; CHECK: [[OR63:%[0-9]+]]:_(s32) = G_OR [[C15]], [[COPY180]]
+    ; CHECK: [[SHL64:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C3]](s64)
+    ; CHECK: [[COPY181:%[0-9]+]]:_(s32) = COPY [[OR63]](s32)
+    ; CHECK: [[COPY182:%[0-9]+]]:_(s32) = COPY [[SHL64]](s32)
+    ; CHECK: [[OR64:%[0-9]+]]:_(s32) = G_OR [[COPY181]], [[COPY182]]
+    ; CHECK: [[SHL65:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C5]](s64)
+    ; CHECK: [[COPY183:%[0-9]+]]:_(s32) = COPY [[OR64]](s32)
+    ; CHECK: [[COPY184:%[0-9]+]]:_(s32) = COPY [[SHL65]](s32)
+    ; CHECK: [[OR65:%[0-9]+]]:_(s32) = G_OR [[COPY183]], [[COPY184]]
+    ; CHECK: [[SHL66:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C7]](s64)
+    ; CHECK: [[COPY185:%[0-9]+]]:_(s32) = COPY [[OR65]](s32)
+    ; CHECK: [[COPY186:%[0-9]+]]:_(s32) = COPY [[SHL66]](s32)
+    ; CHECK: [[OR66:%[0-9]+]]:_(s32) = G_OR [[COPY185]], [[COPY186]]
+    ; CHECK: [[SHL67:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C9]](s64)
+    ; CHECK: [[COPY187:%[0-9]+]]:_(s32) = COPY [[OR66]](s32)
+    ; CHECK: [[COPY188:%[0-9]+]]:_(s32) = COPY [[SHL67]](s32)
+    ; CHECK: [[OR67:%[0-9]+]]:_(s32) = G_OR [[COPY187]], [[COPY188]]
+    ; CHECK: [[SHL68:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C11]](s64)
+    ; CHECK: [[COPY189:%[0-9]+]]:_(s32) = COPY [[OR67]](s32)
+    ; CHECK: [[COPY190:%[0-9]+]]:_(s32) = COPY [[SHL68]](s32)
+    ; CHECK: [[OR68:%[0-9]+]]:_(s32) = G_OR [[COPY189]], [[COPY190]]
+    ; CHECK: [[SHL69:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C13]](s64)
+    ; CHECK: [[COPY191:%[0-9]+]]:_(s32) = COPY [[OR68]](s32)
+    ; CHECK: [[COPY192:%[0-9]+]]:_(s32) = COPY [[SHL69]](s32)
+    ; CHECK: [[OR69:%[0-9]+]]:_(s32) = G_OR [[COPY191]], [[COPY192]]
+    ; CHECK: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[OR69]](s32)
+    ; CHECK: [[SHL70:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C1]](s64)
+    ; CHECK: [[COPY193:%[0-9]+]]:_(s32) = COPY [[SHL70]](s32)
+    ; CHECK: [[OR70:%[0-9]+]]:_(s32) = G_OR [[C15]], [[COPY193]]
+    ; CHECK: [[SHL71:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C3]](s64)
+    ; CHECK: [[COPY194:%[0-9]+]]:_(s32) = COPY [[OR70]](s32)
+    ; CHECK: [[COPY195:%[0-9]+]]:_(s32) = COPY [[SHL71]](s32)
+    ; CHECK: [[OR71:%[0-9]+]]:_(s32) = G_OR [[COPY194]], [[COPY195]]
+    ; CHECK: [[SHL72:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C5]](s64)
+    ; CHECK: [[COPY196:%[0-9]+]]:_(s32) = COPY [[OR71]](s32)
+    ; CHECK: [[COPY197:%[0-9]+]]:_(s32) = COPY [[SHL72]](s32)
+    ; CHECK: [[OR72:%[0-9]+]]:_(s32) = G_OR [[COPY196]], [[COPY197]]
+    ; CHECK: [[SHL73:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C7]](s64)
+    ; CHECK: [[COPY198:%[0-9]+]]:_(s32) = COPY [[OR72]](s32)
+    ; CHECK: [[COPY199:%[0-9]+]]:_(s32) = COPY [[SHL73]](s32)
+    ; CHECK: [[OR73:%[0-9]+]]:_(s32) = G_OR [[COPY198]], [[COPY199]]
+    ; CHECK: [[SHL74:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C9]](s64)
+    ; CHECK: [[COPY200:%[0-9]+]]:_(s32) = COPY [[OR73]](s32)
+    ; CHECK: [[COPY201:%[0-9]+]]:_(s32) = COPY [[SHL74]](s32)
+    ; CHECK: [[OR74:%[0-9]+]]:_(s32) = G_OR [[COPY200]], [[COPY201]]
+    ; CHECK: [[SHL75:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C11]](s64)
+    ; CHECK: [[COPY202:%[0-9]+]]:_(s32) = COPY [[OR74]](s32)
+    ; CHECK: [[COPY203:%[0-9]+]]:_(s32) = COPY [[SHL75]](s32)
+    ; CHECK: [[OR75:%[0-9]+]]:_(s32) = G_OR [[COPY202]], [[COPY203]]
+    ; CHECK: [[SHL76:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C13]](s64)
+    ; CHECK: [[COPY204:%[0-9]+]]:_(s32) = COPY [[OR75]](s32)
+    ; CHECK: [[COPY205:%[0-9]+]]:_(s32) = COPY [[SHL76]](s32)
+    ; CHECK: [[OR76:%[0-9]+]]:_(s32) = G_OR [[COPY204]], [[COPY205]]
+    ; CHECK: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[OR76]](s32)
+    ; CHECK: [[SHL77:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C1]](s64)
+    ; CHECK: [[COPY206:%[0-9]+]]:_(s32) = COPY [[SHL77]](s32)
+    ; CHECK: [[OR77:%[0-9]+]]:_(s32) = G_OR [[C15]], [[COPY206]]
+    ; CHECK: [[SHL78:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C3]](s64)
+    ; CHECK: [[COPY207:%[0-9]+]]:_(s32) = COPY [[OR77]](s32)
+    ; CHECK: [[COPY208:%[0-9]+]]:_(s32) = COPY [[SHL78]](s32)
+    ; CHECK: [[OR78:%[0-9]+]]:_(s32) = G_OR [[COPY207]], [[COPY208]]
+    ; CHECK: [[SHL79:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C5]](s64)
+    ; CHECK: [[COPY209:%[0-9]+]]:_(s32) = COPY [[OR78]](s32)
+    ; CHECK: [[COPY210:%[0-9]+]]:_(s32) = COPY [[SHL79]](s32)
+    ; CHECK: [[OR79:%[0-9]+]]:_(s32) = G_OR [[COPY209]], [[COPY210]]
+    ; CHECK: [[SHL80:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C7]](s64)
+    ; CHECK: [[COPY211:%[0-9]+]]:_(s32) = COPY [[OR79]](s32)
+    ; CHECK: [[COPY212:%[0-9]+]]:_(s32) = COPY [[SHL80]](s32)
+    ; CHECK: [[OR80:%[0-9]+]]:_(s32) = G_OR [[COPY211]], [[COPY212]]
+    ; CHECK: [[SHL81:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C9]](s64)
+    ; CHECK: [[COPY213:%[0-9]+]]:_(s32) = COPY [[OR80]](s32)
+    ; CHECK: [[COPY214:%[0-9]+]]:_(s32) = COPY [[SHL81]](s32)
+    ; CHECK: [[OR81:%[0-9]+]]:_(s32) = G_OR [[COPY213]], [[COPY214]]
+    ; CHECK: [[SHL82:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C11]](s64)
+    ; CHECK: [[COPY215:%[0-9]+]]:_(s32) = COPY [[OR81]](s32)
+    ; CHECK: [[COPY216:%[0-9]+]]:_(s32) = COPY [[SHL82]](s32)
+    ; CHECK: [[OR82:%[0-9]+]]:_(s32) = G_OR [[COPY215]], [[COPY216]]
+    ; CHECK: [[SHL83:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C13]](s64)
+    ; CHECK: [[COPY217:%[0-9]+]]:_(s32) = COPY [[OR82]](s32)
+    ; CHECK: [[COPY218:%[0-9]+]]:_(s32) = COPY [[SHL83]](s32)
+    ; CHECK: [[OR83:%[0-9]+]]:_(s32) = G_OR [[COPY217]], [[COPY218]]
+    ; CHECK: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[OR83]](s32)
+    ; CHECK: [[SHL84:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C1]](s64)
+    ; CHECK: [[COPY219:%[0-9]+]]:_(s32) = COPY [[SHL84]](s32)
+    ; CHECK: [[OR84:%[0-9]+]]:_(s32) = G_OR [[C15]], [[COPY219]]
+    ; CHECK: [[SHL85:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C3]](s64)
+    ; CHECK: [[COPY220:%[0-9]+]]:_(s32) = COPY [[OR84]](s32)
+    ; CHECK: [[COPY221:%[0-9]+]]:_(s32) = COPY [[SHL85]](s32)
+    ; CHECK: [[OR85:%[0-9]+]]:_(s32) = G_OR [[COPY220]], [[COPY221]]
+    ; CHECK: [[SHL86:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C5]](s64)
+    ; CHECK: [[COPY222:%[0-9]+]]:_(s32) = COPY [[OR85]](s32)
+    ; CHECK: [[COPY223:%[0-9]+]]:_(s32) = COPY [[SHL86]](s32)
+    ; CHECK: [[OR86:%[0-9]+]]:_(s32) = G_OR [[COPY222]], [[COPY223]]
+    ; CHECK: [[SHL87:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C7]](s64)
+    ; CHECK: [[COPY224:%[0-9]+]]:_(s32) = COPY [[OR86]](s32)
+    ; CHECK: [[COPY225:%[0-9]+]]:_(s32) = COPY [[SHL87]](s32)
+    ; CHECK: [[OR87:%[0-9]+]]:_(s32) = G_OR [[COPY224]], [[COPY225]]
+    ; CHECK: [[SHL88:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C9]](s64)
+    ; CHECK: [[COPY226:%[0-9]+]]:_(s32) = COPY [[OR87]](s32)
+    ; CHECK: [[COPY227:%[0-9]+]]:_(s32) = COPY [[SHL88]](s32)
+    ; CHECK: [[OR88:%[0-9]+]]:_(s32) = G_OR [[COPY226]], [[COPY227]]
+    ; CHECK: [[SHL89:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C11]](s64)
+    ; CHECK: [[COPY228:%[0-9]+]]:_(s32) = COPY [[OR88]](s32)
+    ; CHECK: [[COPY229:%[0-9]+]]:_(s32) = COPY [[SHL89]](s32)
+    ; CHECK: [[OR89:%[0-9]+]]:_(s32) = G_OR [[COPY228]], [[COPY229]]
+    ; CHECK: [[SHL90:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C13]](s64)
+    ; CHECK: [[COPY230:%[0-9]+]]:_(s32) = COPY [[OR89]](s32)
+    ; CHECK: [[COPY231:%[0-9]+]]:_(s32) = COPY [[SHL90]](s32)
+    ; CHECK: [[OR90:%[0-9]+]]:_(s32) = G_OR [[COPY230]], [[COPY231]]
+    ; CHECK: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[OR90]](s32)
+    ; CHECK: [[SHL91:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C1]](s64)
+    ; CHECK: [[COPY232:%[0-9]+]]:_(s32) = COPY [[SHL91]](s32)
+    ; CHECK: [[OR91:%[0-9]+]]:_(s32) = G_OR [[C15]], [[COPY232]]
+    ; CHECK: [[SHL92:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C3]](s64)
+    ; CHECK: [[COPY233:%[0-9]+]]:_(s32) = COPY [[OR91]](s32)
+    ; CHECK: [[COPY234:%[0-9]+]]:_(s32) = COPY [[SHL92]](s32)
+    ; CHECK: [[OR92:%[0-9]+]]:_(s32) = G_OR [[COPY233]], [[COPY234]]
+    ; CHECK: [[SHL93:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C5]](s64)
+    ; CHECK: [[COPY235:%[0-9]+]]:_(s32) = COPY [[OR92]](s32)
+    ; CHECK: [[COPY236:%[0-9]+]]:_(s32) = COPY [[SHL93]](s32)
+    ; CHECK: [[OR93:%[0-9]+]]:_(s32) = G_OR [[COPY235]], [[COPY236]]
+    ; CHECK: [[SHL94:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C7]](s64)
+    ; CHECK: [[COPY237:%[0-9]+]]:_(s32) = COPY [[OR93]](s32)
+    ; CHECK: [[COPY238:%[0-9]+]]:_(s32) = COPY [[SHL94]](s32)
+    ; CHECK: [[OR94:%[0-9]+]]:_(s32) = G_OR [[COPY237]], [[COPY238]]
+    ; CHECK: [[SHL95:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C9]](s64)
+    ; CHECK: [[COPY239:%[0-9]+]]:_(s32) = COPY [[OR94]](s32)
+    ; CHECK: [[COPY240:%[0-9]+]]:_(s32) = COPY [[SHL95]](s32)
+    ; CHECK: [[OR95:%[0-9]+]]:_(s32) = G_OR [[COPY239]], [[COPY240]]
+    ; CHECK: [[SHL96:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C11]](s64)
+    ; CHECK: [[COPY241:%[0-9]+]]:_(s32) = COPY [[OR95]](s32)
+    ; CHECK: [[COPY242:%[0-9]+]]:_(s32) = COPY [[SHL96]](s32)
+    ; CHECK: [[OR96:%[0-9]+]]:_(s32) = G_OR [[COPY241]], [[COPY242]]
+    ; CHECK: [[SHL97:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C13]](s64)
+    ; CHECK: [[COPY243:%[0-9]+]]:_(s32) = COPY [[OR96]](s32)
+    ; CHECK: [[COPY244:%[0-9]+]]:_(s32) = COPY [[SHL97]](s32)
+    ; CHECK: [[OR97:%[0-9]+]]:_(s32) = G_OR [[COPY243]], [[COPY244]]
+    ; CHECK: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[OR97]](s32)
+    ; CHECK: [[SHL98:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C1]](s64)
+    ; CHECK: [[COPY245:%[0-9]+]]:_(s32) = COPY [[SHL98]](s32)
+    ; CHECK: [[OR98:%[0-9]+]]:_(s32) = G_OR [[C15]], [[COPY245]]
+    ; CHECK: [[SHL99:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C3]](s64)
+    ; CHECK: [[COPY246:%[0-9]+]]:_(s32) = COPY [[OR98]](s32)
+    ; CHECK: [[COPY247:%[0-9]+]]:_(s32) = COPY [[SHL99]](s32)
+    ; CHECK: [[OR99:%[0-9]+]]:_(s32) = G_OR [[COPY246]], [[COPY247]]
+    ; CHECK: [[SHL100:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C5]](s64)
+    ; CHECK: [[COPY248:%[0-9]+]]:_(s32) = COPY [[OR99]](s32)
+    ; CHECK: [[COPY249:%[0-9]+]]:_(s32) = COPY [[SHL100]](s32)
+    ; CHECK: [[OR100:%[0-9]+]]:_(s32) = G_OR [[COPY248]], [[COPY249]]
+    ; CHECK: [[SHL101:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C7]](s64)
+    ; CHECK: [[COPY250:%[0-9]+]]:_(s32) = COPY [[OR100]](s32)
+    ; CHECK: [[COPY251:%[0-9]+]]:_(s32) = COPY [[SHL101]](s32)
+    ; CHECK: [[OR101:%[0-9]+]]:_(s32) = G_OR [[COPY250]], [[COPY251]]
+    ; CHECK: [[SHL102:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C9]](s64)
+    ; CHECK: [[COPY252:%[0-9]+]]:_(s32) = COPY [[OR101]](s32)
+    ; CHECK: [[COPY253:%[0-9]+]]:_(s32) = COPY [[SHL102]](s32)
+    ; CHECK: [[OR102:%[0-9]+]]:_(s32) = G_OR [[COPY252]], [[COPY253]]
+    ; CHECK: [[SHL103:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C11]](s64)
+    ; CHECK: [[COPY254:%[0-9]+]]:_(s32) = COPY [[OR102]](s32)
+    ; CHECK: [[COPY255:%[0-9]+]]:_(s32) = COPY [[SHL103]](s32)
+    ; CHECK: [[OR103:%[0-9]+]]:_(s32) = G_OR [[COPY254]], [[COPY255]]
+    ; CHECK: [[SHL104:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C13]](s64)
+    ; CHECK: [[COPY256:%[0-9]+]]:_(s32) = COPY [[OR103]](s32)
+    ; CHECK: [[COPY257:%[0-9]+]]:_(s32) = COPY [[SHL104]](s32)
+    ; CHECK: [[OR104:%[0-9]+]]:_(s32) = G_OR [[COPY256]], [[COPY257]]
+    ; CHECK: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[OR104]](s32)
+    ; CHECK: [[SHL105:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C1]](s64)
+    ; CHECK: [[COPY258:%[0-9]+]]:_(s32) = COPY [[SHL105]](s32)
+    ; CHECK: [[OR105:%[0-9]+]]:_(s32) = G_OR [[C15]], [[COPY258]]
+    ; CHECK: [[SHL106:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C3]](s64)
+    ; CHECK: [[COPY259:%[0-9]+]]:_(s32) = COPY [[OR105]](s32)
+    ; CHECK: [[COPY260:%[0-9]+]]:_(s32) = COPY [[SHL106]](s32)
+    ; CHECK: [[OR106:%[0-9]+]]:_(s32) = G_OR [[COPY259]], [[COPY260]]
+    ; CHECK: [[SHL107:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C5]](s64)
+    ; CHECK: [[COPY261:%[0-9]+]]:_(s32) = COPY [[OR106]](s32)
+    ; CHECK: [[COPY262:%[0-9]+]]:_(s32) = COPY [[SHL107]](s32)
+    ; CHECK: [[OR107:%[0-9]+]]:_(s32) = G_OR [[COPY261]], [[COPY262]]
+    ; CHECK: [[SHL108:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C7]](s64)
+    ; CHECK: [[COPY263:%[0-9]+]]:_(s32) = COPY [[OR107]](s32)
+    ; CHECK: [[COPY264:%[0-9]+]]:_(s32) = COPY [[SHL108]](s32)
+    ; CHECK: [[OR108:%[0-9]+]]:_(s32) = G_OR [[COPY263]], [[COPY264]]
+    ; CHECK: [[SHL109:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C9]](s64)
+    ; CHECK: [[COPY265:%[0-9]+]]:_(s32) = COPY [[OR108]](s32)
+    ; CHECK: [[COPY266:%[0-9]+]]:_(s32) = COPY [[SHL109]](s32)
+    ; CHECK: [[OR109:%[0-9]+]]:_(s32) = G_OR [[COPY265]], [[COPY266]]
+    ; CHECK: [[SHL110:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C11]](s64)
+    ; CHECK: [[COPY267:%[0-9]+]]:_(s32) = COPY [[OR109]](s32)
+    ; CHECK: [[COPY268:%[0-9]+]]:_(s32) = COPY [[SHL110]](s32)
+    ; CHECK: [[OR110:%[0-9]+]]:_(s32) = G_OR [[COPY267]], [[COPY268]]
+    ; CHECK: [[SHL111:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C13]](s64)
+    ; CHECK: [[COPY269:%[0-9]+]]:_(s32) = COPY [[OR110]](s32)
+    ; CHECK: [[COPY270:%[0-9]+]]:_(s32) = COPY [[SHL111]](s32)
+    ; CHECK: [[OR111:%[0-9]+]]:_(s32) = G_OR [[COPY269]], [[COPY270]]
+    ; CHECK: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[OR111]](s32)
+    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8), [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8)
+    ; CHECK: $x0 = COPY [[MV]](s64)
+    ; CHECK: $x1 = COPY [[MV1]](s64)
     %0:_(s64) = COPY $x0
     %1:_(s64) = COPY $x1
     %2:_(s64) = COPY $x2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll
index 1e10b2390ed9..1eaae2cfff9d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll
@@ -6,13 +6,13 @@ define amdgpu_kernel void @frem_f16(half addrspace(1)* %out, half addrspace(1)*
 ; CI-LABEL: frem_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; CI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xd
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_load_dword s0, s[6:7], 0x0
-; CI-NEXT:    s_load_dword s1, s[8:9], 0x2
+; CI-NEXT:    s_load_dword s2, s[6:7], 0x0
+; CI-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_cvt_f32_f16_e32 v0, s0
-; CI-NEXT:    v_cvt_f32_f16_e32 v1, s1
+; CI-NEXT:    v_cvt_f32_f16_e32 v0, s2
+; CI-NEXT:    v_cvt_f32_f16_e32 v1, s0
 ; CI-NEXT:    v_div_scale_f32 v2, s[0:1], v1, v1, v0
 ; CI-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
 ; CI-NEXT:    v_rcp_f32_e32 v4, v2
@@ -38,20 +38,20 @@ define amdgpu_kernel void @frem_f16(half addrspace(1)* %out, half addrspace(1)*
 ; VI-LABEL: frem_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_load_dword s0, s[6:7], 0x0
-; VI-NEXT:    s_load_dword s1, s[8:9], 0x8
+; VI-NEXT:    s_load_dword s2, s[6:7], 0x0
+; VI-NEXT:    s_load_dword s0, s[0:1], 0x8
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cvt_f32_f16_e32 v0, s0
-; VI-NEXT:    v_cvt_f32_f16_e32 v2, s1
-; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_cvt_f32_f16_e32 v0, s2
+; VI-NEXT:    v_cvt_f32_f16_e32 v2, s0
+; VI-NEXT:    v_mov_b32_e32 v1, s0
 ; VI-NEXT:    v_rcp_f32_e32 v2, v2
 ; VI-NEXT:    v_mul_f32_e32 v0, v0, v2
 ; VI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; VI-NEXT:    v_div_fixup_f16 v0, v0, v1, s0
+; VI-NEXT:    v_div_fixup_f16 v0, v0, v1, s2
 ; VI-NEXT:    v_trunc_f16_e32 v0, v0
-; VI-NEXT:    v_fma_f16 v2, -v0, v1, s0
+; VI-NEXT:    v_fma_f16 v2, -v0, v1, s2
 ; VI-NEXT:    v_mov_b32_e32 v0, s4
 ; VI-NEXT:    v_mov_b32_e32 v1, s5
 ; VI-NEXT:    flat_store_short v[0:1], v2
@@ -68,15 +68,15 @@ define amdgpu_kernel void @fast_frem_f16(half addrspace(1)* %out, half addrspace
 ; CI-LABEL: fast_frem_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; CI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xd
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_load_dword s0, s[6:7], 0x0
-; CI-NEXT:    s_load_dword s1, s[8:9], 0x2
+; CI-NEXT:    s_load_dword s2, s[6:7], 0x0
+; CI-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; CI-NEXT:    s_mov_b32 s6, -1
 ; CI-NEXT:    s_mov_b32 s7, 0xf000
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_cvt_f32_f16_e32 v0, s0
-; CI-NEXT:    v_cvt_f32_f16_e32 v1, s1
+; CI-NEXT:    v_cvt_f32_f16_e32 v0, s2
+; CI-NEXT:    v_cvt_f32_f16_e32 v1, s0
 ; CI-NEXT:    v_rcp_f32_e32 v2, v1
 ; CI-NEXT:    v_mul_f32_e32 v2, v0, v2
 ; CI-NEXT:    v_trunc_f32_e32 v2, v2
@@ -88,16 +88,16 @@ define amdgpu_kernel void @fast_frem_f16(half addrspace(1)* %out, half addrspace
 ; VI-LABEL: fast_frem_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_load_dword s0, s[6:7], 0x0
-; VI-NEXT:    s_load_dword s1, s[8:9], 0x8
+; VI-NEXT:    s_load_dword s2, s[6:7], 0x0
+; VI-NEXT:    s_load_dword s0, s[0:1], 0x8
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s0
-; VI-NEXT:    v_rcp_f16_e32 v0, s1
-; VI-NEXT:    v_mul_f16_e32 v0, s0, v0
+; VI-NEXT:    v_mov_b32_e32 v1, s2
+; VI-NEXT:    v_rcp_f16_e32 v0, s0
+; VI-NEXT:    v_mul_f16_e32 v0, s2, v0
 ; VI-NEXT:    v_trunc_f16_e32 v0, v0
-; VI-NEXT:    v_fma_f16 v2, -v0, s1, v1
+; VI-NEXT:    v_fma_f16 v2, -v0, s0, v1
 ; VI-NEXT:    v_mov_b32_e32 v0, s4
 ; VI-NEXT:    v_mov_b32_e32 v1, s5
 ; VI-NEXT:    flat_store_short v[0:1], v2
@@ -114,15 +114,15 @@ define amdgpu_kernel void @unsafe_frem_f16(half addrspace(1)* %out, half addrspa
 ; CI-LABEL: unsafe_frem_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; CI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xd
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_load_dword s0, s[6:7], 0x0
-; CI-NEXT:    s_load_dword s1, s[8:9], 0x2
+; CI-NEXT:    s_load_dword s2, s[6:7], 0x0
+; CI-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; CI-NEXT:    s_mov_b32 s6, -1
 ; CI-NEXT:    s_mov_b32 s7, 0xf000
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_cvt_f32_f16_e32 v0, s0
-; CI-NEXT:    v_cvt_f32_f16_e32 v1, s1
+; CI-NEXT:    v_cvt_f32_f16_e32 v0, s2
+; CI-NEXT:    v_cvt_f32_f16_e32 v1, s0
 ; CI-NEXT:    v_rcp_f32_e32 v2, v1
 ; CI-NEXT:    v_mul_f32_e32 v2, v0, v2
 ; CI-NEXT:    v_trunc_f32_e32 v2, v2
@@ -134,16 +134,16 @@ define amdgpu_kernel void @unsafe_frem_f16(half addrspace(1)* %out, half addrspa
 ; VI-LABEL: unsafe_frem_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_load_dword s0, s[6:7], 0x0
-; VI-NEXT:    s_load_dword s1, s[8:9], 0x8
+; VI-NEXT:    s_load_dword s2, s[6:7], 0x0
+; VI-NEXT:    s_load_dword s0, s[0:1], 0x8
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s0
-; VI-NEXT:    v_rcp_f16_e32 v0, s1
-; VI-NEXT:    v_mul_f16_e32 v0, s0, v0
+; VI-NEXT:    v_mov_b32_e32 v1, s2
+; VI-NEXT:    v_rcp_f16_e32 v0, s0
+; VI-NEXT:    v_mul_f16_e32 v0, s2, v0
 ; VI-NEXT:    v_trunc_f16_e32 v0, v0
-; VI-NEXT:    v_fma_f16 v2, -v0, s1, v1
+; VI-NEXT:    v_fma_f16 v2, -v0, s0, v1
 ; VI-NEXT:    v_mov_b32_e32 v0, s4
 ; VI-NEXT:    v_mov_b32_e32 v1, s5
 ; VI-NEXT:    flat_store_short v[0:1], v2
@@ -160,10 +160,10 @@ define amdgpu_kernel void @frem_f32(float addrspace(1)* %out, float addrspace(1)
 ; CI-LABEL: frem_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; CI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xd
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    s_load_dword s2, s[6:7], 0x0
-; CI-NEXT:    s_load_dword s0, s[8:9], 0x4
+; CI-NEXT:    s_load_dword s0, s[0:1], 0x4
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    v_mov_b32_e32 v0, s0
 ; CI-NEXT:    v_div_scale_f32 v1, s[0:1], v0, v0, s2
@@ -189,10 +189,10 @@ define amdgpu_kernel void @frem_f32(float addrspace(1)* %out, float addrspace(1)
 ; VI-LABEL: frem_f32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    s_load_dword s2, s[6:7], 0x0
-; VI-NEXT:    s_load_dword s0, s[8:9], 0x10
+; VI-NEXT:    s_load_dword s0, s[0:1], 0x10
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-NEXT:    v_div_scale_f32 v1, s[0:1], v0, v0, s2
@@ -226,34 +226,34 @@ define amdgpu_kernel void @fast_frem_f32(float addrspace(1)* %out, float addrspa
 ; CI-LABEL: fast_frem_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; CI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xd
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_load_dword s0, s[6:7], 0x0
-; CI-NEXT:    s_load_dword s1, s[8:9], 0x4
+; CI-NEXT:    s_load_dword s2, s[6:7], 0x0
+; CI-NEXT:    s_load_dword s0, s[0:1], 0x4
 ; CI-NEXT:    s_mov_b32 s6, -1
 ; CI-NEXT:    s_mov_b32 s7, 0xf000
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_mov_b32_e32 v1, s0
-; CI-NEXT:    v_rcp_f32_e32 v0, s1
-; CI-NEXT:    v_mul_f32_e32 v0, s0, v0
+; CI-NEXT:    v_mov_b32_e32 v1, s2
+; CI-NEXT:    v_rcp_f32_e32 v0, s0
+; CI-NEXT:    v_mul_f32_e32 v0, s2, v0
 ; CI-NEXT:    v_trunc_f32_e32 v0, v0
-; CI-NEXT:    v_fma_f32 v0, -v0, s1, v1
+; CI-NEXT:    v_fma_f32 v0, -v0, s0, v1
 ; CI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; CI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: fast_frem_f32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_load_dword s0, s[6:7], 0x0
-; VI-NEXT:    s_load_dword s1, s[8:9], 0x10
+; VI-NEXT:    s_load_dword s2, s[6:7], 0x0
+; VI-NEXT:    s_load_dword s0, s[0:1], 0x10
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s0
-; VI-NEXT:    v_rcp_f32_e32 v0, s1
-; VI-NEXT:    v_mul_f32_e32 v0, s0, v0
+; VI-NEXT:    v_mov_b32_e32 v1, s2
+; VI-NEXT:    v_rcp_f32_e32 v0, s0
+; VI-NEXT:    v_mul_f32_e32 v0, s2, v0
 ; VI-NEXT:    v_trunc_f32_e32 v0, v0
-; VI-NEXT:    v_fma_f32 v2, -v0, s1, v1
+; VI-NEXT:    v_fma_f32 v2, -v0, s0, v1
 ; VI-NEXT:    v_mov_b32_e32 v0, s4
 ; VI-NEXT:    v_mov_b32_e32 v1, s5
 ; VI-NEXT:    flat_store_dword v[0:1], v2
@@ -270,34 +270,34 @@ define amdgpu_kernel void @unsafe_frem_f32(float addrspace(1)* %out, float addrs
 ; CI-LABEL: unsafe_frem_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; CI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xd
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_load_dword s0, s[6:7], 0x0
-; CI-NEXT:    s_load_dword s1, s[8:9], 0x4
+; CI-NEXT:    s_load_dword s2, s[6:7], 0x0
+; CI-NEXT:    s_load_dword s0, s[0:1], 0x4
 ; CI-NEXT:    s_mov_b32 s6, -1
 ; CI-NEXT:    s_mov_b32 s7, 0xf000
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_mov_b32_e32 v1, s0
-; CI-NEXT:    v_rcp_f32_e32 v0, s1
-; CI-NEXT:    v_mul_f32_e32 v0, s0, v0
+; CI-NEXT:    v_mov_b32_e32 v1, s2
+; CI-NEXT:    v_rcp_f32_e32 v0, s0
+; CI-NEXT:    v_mul_f32_e32 v0, s2, v0
 ; CI-NEXT:    v_trunc_f32_e32 v0, v0
-; CI-NEXT:    v_fma_f32 v0, -v0, s1, v1
+; CI-NEXT:    v_fma_f32 v0, -v0, s0, v1
 ; CI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; CI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: unsafe_frem_f32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_load_dword s0, s[6:7], 0x0
-; VI-NEXT:    s_load_dword s1, s[8:9], 0x10
+; VI-NEXT:    s_load_dword s2, s[6:7], 0x0
+; VI-NEXT:    s_load_dword s0, s[0:1], 0x10
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s0
-; VI-NEXT:    v_rcp_f32_e32 v0, s1
-; VI-NEXT:    v_mul_f32_e32 v0, s0, v0
+; VI-NEXT:    v_mov_b32_e32 v1, s2
+; VI-NEXT:    v_rcp_f32_e32 v0, s0
+; VI-NEXT:    v_mul_f32_e32 v0, s2, v0
 ; VI-NEXT:    v_trunc_f32_e32 v0, v0
-; VI-NEXT:    v_fma_f32 v2, -v0, s1, v1
+; VI-NEXT:    v_fma_f32 v2, -v0, s0, v1
 ; VI-NEXT:    v_mov_b32_e32 v0, s4
 ; VI-NEXT:    v_mov_b32_e32 v1, s5
 ; VI-NEXT:    flat_store_dword v[0:1], v2
@@ -314,15 +314,15 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace(
 ; CI-LABEL: frem_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; CI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xd
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; CI-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x0
+; CI-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x0
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_mov_b32_e32 v0, s2
-; CI-NEXT:    v_mov_b32_e32 v1, s3
-; CI-NEXT:    v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], s[0:1]
-; CI-NEXT:    v_div_scale_f64 v[8:9], vcc, s[0:1], v[0:1], s[0:1]
+; CI-NEXT:    v_mov_b32_e32 v0, s0
+; CI-NEXT:    v_mov_b32_e32 v1, s1
+; CI-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], s[2:3]
+; CI-NEXT:    v_div_scale_f64 v[8:9], vcc, s[2:3], v[0:1], s[2:3]
 ; CI-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; CI-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; CI-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -331,9 +331,9 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace(
 ; CI-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
 ; CI-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
 ; CI-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; CI-NEXT:    v_div_fixup_f64 v[2:3], v[2:3], v[0:1], s[0:1]
+; CI-NEXT:    v_div_fixup_f64 v[2:3], v[2:3], v[0:1], s[2:3]
 ; CI-NEXT:    v_trunc_f64_e32 v[2:3], v[2:3]
-; CI-NEXT:    v_fma_f64 v[0:1], -v[2:3], v[0:1], s[0:1]
+; CI-NEXT:    v_fma_f64 v[0:1], -v[2:3], v[0:1], s[2:3]
 ; CI-NEXT:    v_mov_b32_e32 v2, s4
 ; CI-NEXT:    v_mov_b32_e32 v3, s5
 ; CI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
@@ -342,15 +342,15 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace(
 ; VI-LABEL: frem_f64:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; VI-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x0
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x0
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v0, s2
-; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], s[0:1]
-; VI-NEXT:    v_div_scale_f64 v[8:9], vcc, s[0:1], v[0:1], s[0:1]
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_div_scale_f64 v[2:3], s[0:1], v[0:1], v[0:1], s[2:3]
+; VI-NEXT:    v_div_scale_f64 v[8:9], vcc, s[2:3], v[0:1], s[2:3]
 ; VI-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; VI-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; VI-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -359,9 +359,9 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace(
 ; VI-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
 ; VI-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
 ; VI-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; VI-NEXT:    v_div_fixup_f64 v[2:3], v[2:3], v[0:1], s[0:1]
+; VI-NEXT:    v_div_fixup_f64 v[2:3], v[2:3], v[0:1], s[2:3]
 ; VI-NEXT:    v_trunc_f64_e32 v[2:3], v[2:3]
-; VI-NEXT:    v_fma_f64 v[0:1], -v[2:3], v[0:1], s[0:1]
+; VI-NEXT:    v_fma_f64 v[0:1], -v[2:3], v[0:1], s[2:3]
 ; VI-NEXT:    v_mov_b32_e32 v2, s4
 ; VI-NEXT:    v_mov_b32_e32 v3, s5
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
@@ -377,23 +377,23 @@ define amdgpu_kernel void @fast_frem_f64(double addrspace(1)* %out, double addrs
 ; CI-LABEL: fast_frem_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; CI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xd
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; CI-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x0
+; CI-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x0
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_rcp_f64_e32 v[0:1], s[2:3]
-; CI-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; CI-NEXT:    v_rcp_f64_e32 v[0:1], s[0:1]
+; CI-NEXT:    v_fma_f64 v[2:3], -s[0:1], v[0:1], 1.0
 ; CI-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; CI-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; CI-NEXT:    v_fma_f64 v[2:3], -s[0:1], v[0:1], 1.0
 ; CI-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; CI-NEXT:    v_mov_b32_e32 v3, s1
-; CI-NEXT:    v_mov_b32_e32 v2, s0
-; CI-NEXT:    v_mul_f64 v[4:5], s[0:1], v[0:1]
-; CI-NEXT:    v_fma_f64 v[6:7], -s[2:3], v[4:5], v[2:3]
+; CI-NEXT:    v_mov_b32_e32 v2, s2
+; CI-NEXT:    v_mov_b32_e32 v3, s3
+; CI-NEXT:    v_mul_f64 v[4:5], s[2:3], v[0:1]
+; CI-NEXT:    v_fma_f64 v[6:7], -s[0:1], v[4:5], v[2:3]
 ; CI-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[4:5]
 ; CI-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
-; CI-NEXT:    v_fma_f64 v[0:1], -v[0:1], s[2:3], v[2:3]
+; CI-NEXT:    v_fma_f64 v[0:1], -v[0:1], s[0:1], v[2:3]
 ; CI-NEXT:    v_mov_b32_e32 v2, s4
 ; CI-NEXT:    v_mov_b32_e32 v3, s5
 ; CI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
@@ -402,23 +402,23 @@ define amdgpu_kernel void @fast_frem_f64(double addrspace(1)* %out, double addrs
 ; VI-LABEL: fast_frem_f64:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; VI-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x0
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x0
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_rcp_f64_e32 v[0:1], s[2:3]
-; VI-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; VI-NEXT:    v_rcp_f64_e32 v[0:1], s[0:1]
+; VI-NEXT:    v_fma_f64 v[2:3], -s[0:1], v[0:1], 1.0
 ; VI-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; VI-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; VI-NEXT:    v_fma_f64 v[2:3], -s[0:1], v[0:1], 1.0
 ; VI-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_mov_b32_e32 v2, s0
-; VI-NEXT:    v_mul_f64 v[4:5], s[0:1], v[0:1]
-; VI-NEXT:    v_fma_f64 v[6:7], -s[2:3], v[4:5], v[2:3]
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    v_mul_f64 v[4:5], s[2:3], v[0:1]
+; VI-NEXT:    v_fma_f64 v[6:7], -s[0:1], v[4:5], v[2:3]
 ; VI-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[4:5]
 ; VI-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
-; VI-NEXT:    v_fma_f64 v[0:1], -v[0:1], s[2:3], v[2:3]
+; VI-NEXT:    v_fma_f64 v[0:1], -v[0:1], s[0:1], v[2:3]
 ; VI-NEXT:    v_mov_b32_e32 v2, s4
 ; VI-NEXT:    v_mov_b32_e32 v3, s5
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
@@ -434,23 +434,23 @@ define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double add
 ; CI-LABEL: unsafe_frem_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; CI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xd
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; CI-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x0
+; CI-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x0
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_rcp_f64_e32 v[0:1], s[2:3]
-; CI-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; CI-NEXT:    v_rcp_f64_e32 v[0:1], s[0:1]
+; CI-NEXT:    v_fma_f64 v[2:3], -s[0:1], v[0:1], 1.0
 ; CI-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; CI-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; CI-NEXT:    v_fma_f64 v[2:3], -s[0:1], v[0:1], 1.0
 ; CI-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; CI-NEXT:    v_mov_b32_e32 v3, s1
-; CI-NEXT:    v_mov_b32_e32 v2, s0
-; CI-NEXT:    v_mul_f64 v[4:5], s[0:1], v[0:1]
-; CI-NEXT:    v_fma_f64 v[6:7], -s[2:3], v[4:5], v[2:3]
+; CI-NEXT:    v_mov_b32_e32 v2, s2
+; CI-NEXT:    v_mov_b32_e32 v3, s3
+; CI-NEXT:    v_mul_f64 v[4:5], s[2:3], v[0:1]
+; CI-NEXT:    v_fma_f64 v[6:7], -s[0:1], v[4:5], v[2:3]
 ; CI-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[4:5]
 ; CI-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
-; CI-NEXT:    v_fma_f64 v[0:1], -v[0:1], s[2:3], v[2:3]
+; CI-NEXT:    v_fma_f64 v[0:1], -v[0:1], s[0:1], v[2:3]
 ; CI-NEXT:    v_mov_b32_e32 v2, s4
 ; CI-NEXT:    v_mov_b32_e32 v3, s5
 ; CI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
@@ -459,23 +459,23 @@ define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double add
 ; VI-LABEL: unsafe_frem_f64:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; VI-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x0
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x0
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_rcp_f64_e32 v[0:1], s[2:3]
-; VI-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; VI-NEXT:    v_rcp_f64_e32 v[0:1], s[0:1]
+; VI-NEXT:    v_fma_f64 v[2:3], -s[0:1], v[0:1], 1.0
 ; VI-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; VI-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; VI-NEXT:    v_fma_f64 v[2:3], -s[0:1], v[0:1], 1.0
 ; VI-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_mov_b32_e32 v2, s0
-; VI-NEXT:    v_mul_f64 v[4:5], s[0:1], v[0:1]
-; VI-NEXT:    v_fma_f64 v[6:7], -s[2:3], v[4:5], v[2:3]
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    v_mul_f64 v[4:5], s[2:3], v[0:1]
+; VI-NEXT:    v_fma_f64 v[6:7], -s[0:1], v[4:5], v[2:3]
 ; VI-NEXT:    v_fma_f64 v[0:1], v[6:7], v[0:1], v[4:5]
 ; VI-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
-; VI-NEXT:    v_fma_f64 v[0:1], -v[0:1], s[2:3], v[2:3]
+; VI-NEXT:    v_fma_f64 v[0:1], -v[0:1], s[0:1], v[2:3]
 ; VI-NEXT:    v_mov_b32_e32 v2, s4
 ; VI-NEXT:    v_mov_b32_e32 v3, s5
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
@@ -492,15 +492,15 @@ define amdgpu_kernel void @frem_v2f16(<2 x half> addrspace(1)* %out, <2 x half>
 ; CI-LABEL: frem_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; CI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xd
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_load_dword s0, s[6:7], 0x0
-; CI-NEXT:    s_load_dword s1, s[8:9], 0x4
+; CI-NEXT:    s_load_dword s2, s[6:7], 0x0
+; CI-NEXT:    s_load_dword s0, s[0:1], 0x4
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_cvt_f32_f16_e32 v0, s0
-; CI-NEXT:    v_cvt_f32_f16_e32 v1, s1
-; CI-NEXT:    s_lshr_b32 s2, s0, 16
-; CI-NEXT:    s_lshr_b32 s3, s1, 16
+; CI-NEXT:    v_cvt_f32_f16_e32 v0, s2
+; CI-NEXT:    v_cvt_f32_f16_e32 v1, s0
+; CI-NEXT:    s_lshr_b32 s6, s0, 16
+; CI-NEXT:    s_lshr_b32 s3, s2, 16
 ; CI-NEXT:    v_div_scale_f32 v2, s[0:1], v1, v1, v0
 ; CI-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
 ; CI-NEXT:    v_rcp_f32_e32 v4, v2
@@ -516,8 +516,8 @@ define amdgpu_kernel void @frem_v2f16(<2 x half> addrspace(1)* %out, <2 x half>
 ; CI-NEXT:    v_div_fixup_f32 v2, v2, v1, v0
 ; CI-NEXT:    v_trunc_f32_e32 v2, v2
 ; CI-NEXT:    v_fma_f32 v0, -v2, v1, v0
-; CI-NEXT:    v_cvt_f32_f16_e32 v1, s2
-; CI-NEXT:    v_cvt_f32_f16_e32 v2, s3
+; CI-NEXT:    v_cvt_f32_f16_e32 v1, s3
+; CI-NEXT:    v_cvt_f32_f16_e32 v2, s6
 ; CI-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
 ; CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; CI-NEXT:    v_div_scale_f32 v3, s[0:1], v2, v2, v1
@@ -548,31 +548,31 @@ define amdgpu_kernel void @frem_v2f16(<2 x half> addrspace(1)* %out, <2 x half>
 ; VI-LABEL: frem_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_load_dword s0, s[6:7], 0x0
-; VI-NEXT:    s_load_dword s1, s[8:9], 0x10
+; VI-NEXT:    s_load_dword s2, s[6:7], 0x0
+; VI-NEXT:    s_load_dword s0, s[0:1], 0x10
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cvt_f32_f16_e32 v0, s0
-; VI-NEXT:    v_cvt_f32_f16_e32 v2, s1
-; VI-NEXT:    s_lshr_b32 s3, s1, 16
+; VI-NEXT:    v_cvt_f32_f16_e32 v0, s2
+; VI-NEXT:    v_cvt_f32_f16_e32 v2, s0
+; VI-NEXT:    s_lshr_b32 s3, s0, 16
 ; VI-NEXT:    v_cvt_f32_f16_e32 v3, s3
-; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s0
 ; VI-NEXT:    v_rcp_f32_e32 v2, v2
-; VI-NEXT:    s_lshr_b32 s2, s0, 16
+; VI-NEXT:    s_lshr_b32 s1, s2, 16
 ; VI-NEXT:    v_rcp_f32_e32 v3, v3
 ; VI-NEXT:    v_mul_f32_e32 v0, v0, v2
 ; VI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; VI-NEXT:    v_mov_b32_e32 v2, s3
-; VI-NEXT:    v_div_fixup_f16 v0, v0, v1, s0
+; VI-NEXT:    v_div_fixup_f16 v0, v0, v1, s2
 ; VI-NEXT:    v_trunc_f16_e32 v0, v0
-; VI-NEXT:    v_fma_f16 v0, -v0, v1, s0
-; VI-NEXT:    v_cvt_f32_f16_e32 v1, s2
+; VI-NEXT:    v_fma_f16 v0, -v0, v1, s2
+; VI-NEXT:    v_cvt_f32_f16_e32 v1, s1
 ; VI-NEXT:    v_mul_f32_e32 v1, v1, v3
 ; VI-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; VI-NEXT:    v_div_fixup_f16 v1, v1, v2, s2
+; VI-NEXT:    v_div_fixup_f16 v1, v1, v2, s1
 ; VI-NEXT:    v_trunc_f16_e32 v1, v1
-; VI-NEXT:    v_fma_f16 v1, -v1, v2, s2
+; VI-NEXT:    v_fma_f16 v1, -v1, v2, s1
 ; VI-NEXT:    v_mov_b32_e32 v2, 16
 ; VI-NEXT:    v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
 ; VI-NEXT:    v_or_b32_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
@@ -592,18 +592,18 @@ define amdgpu_kernel void @frem_v4f16(<4 x half> addrspace(1)* %out, <4 x half>
 ; CI-LABEL: frem_v4f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; CI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xd
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; CI-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x8
+; CI-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x0
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x8
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_cvt_f32_f16_e32 v0, s0
-; CI-NEXT:    v_cvt_f32_f16_e32 v1, s2
-; CI-NEXT:    s_lshr_b32 s8, s0, 16
-; CI-NEXT:    s_lshr_b32 s9, s1, 16
-; CI-NEXT:    s_lshr_b32 s10, s2, 16
+; CI-NEXT:    v_cvt_f32_f16_e32 v0, s2
+; CI-NEXT:    v_cvt_f32_f16_e32 v1, s0
+; CI-NEXT:    s_lshr_b32 s8, s2, 16
+; CI-NEXT:    s_lshr_b32 s9, s3, 16
+; CI-NEXT:    s_lshr_b32 s10, s0, 16
 ; CI-NEXT:    v_div_scale_f32 v2, s[6:7], v1, v1, v0
-; CI-NEXT:    s_lshr_b32 s11, s3, 16
+; CI-NEXT:    s_lshr_b32 s11, s1, 16
 ; CI-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
 ; CI-NEXT:    v_rcp_f32_e32 v4, v2
 ; CI-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
@@ -637,8 +637,8 @@ define amdgpu_kernel void @frem_v4f16(<4 x half> addrspace(1)* %out, <4 x half>
 ; CI-NEXT:    v_div_fixup_f32 v3, v3, v2, v1
 ; CI-NEXT:    v_trunc_f32_e32 v3, v3
 ; CI-NEXT:    v_fma_f32 v1, -v3, v2, v1
-; CI-NEXT:    v_cvt_f32_f16_e32 v2, s1
-; CI-NEXT:    v_cvt_f32_f16_e32 v3, s3
+; CI-NEXT:    v_cvt_f32_f16_e32 v2, s3
+; CI-NEXT:    v_cvt_f32_f16_e32 v3, s1
 ; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; CI-NEXT:    v_div_scale_f32 v4, s[0:1], v3, v3, v2
 ; CI-NEXT:    v_div_scale_f32 v5, vcc, v2, v3, v2
@@ -690,45 +690,45 @@ define amdgpu_kernel void @frem_v4f16(<4 x half> addrspace(1)* %out, <4 x half>
 ; VI-LABEL: frem_v4f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; VI-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x20
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x0
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x20
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cvt_f32_f16_e32 v0, s0
-; VI-NEXT:    v_cvt_f32_f16_e32 v2, s2
-; VI-NEXT:    s_lshr_b32 s8, s2, 16
+; VI-NEXT:    v_cvt_f32_f16_e32 v0, s2
+; VI-NEXT:    v_cvt_f32_f16_e32 v2, s0
+; VI-NEXT:    s_lshr_b32 s8, s0, 16
 ; VI-NEXT:    v_cvt_f32_f16_e32 v3, s8
-; VI-NEXT:    v_mov_b32_e32 v1, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s0
 ; VI-NEXT:    v_rcp_f32_e32 v2, v2
-; VI-NEXT:    s_lshr_b32 s6, s0, 16
+; VI-NEXT:    s_lshr_b32 s6, s2, 16
 ; VI-NEXT:    v_rcp_f32_e32 v3, v3
-; VI-NEXT:    v_cvt_f32_f16_e32 v4, s3
+; VI-NEXT:    v_cvt_f32_f16_e32 v4, s1
 ; VI-NEXT:    v_mul_f32_e32 v0, v0, v2
 ; VI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; VI-NEXT:    v_mov_b32_e32 v2, s8
 ; VI-NEXT:    v_rcp_f32_e32 v4, v4
-; VI-NEXT:    s_lshr_b32 s9, s3, 16
-; VI-NEXT:    v_div_fixup_f16 v0, v0, v1, s0
+; VI-NEXT:    s_lshr_b32 s9, s1, 16
+; VI-NEXT:    v_div_fixup_f16 v0, v0, v1, s2
 ; VI-NEXT:    v_trunc_f16_e32 v0, v0
-; VI-NEXT:    v_fma_f16 v0, -v0, v1, s0
+; VI-NEXT:    v_fma_f16 v0, -v0, v1, s2
 ; VI-NEXT:    v_cvt_f32_f16_e32 v1, s6
 ; VI-NEXT:    v_cvt_f32_f16_e32 v5, s9
-; VI-NEXT:    s_lshr_b32 s7, s1, 16
+; VI-NEXT:    s_lshr_b32 s7, s3, 16
 ; VI-NEXT:    v_mul_f32_e32 v1, v1, v3
 ; VI-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    v_mov_b32_e32 v3, s1
 ; VI-NEXT:    v_rcp_f32_e32 v5, v5
 ; VI-NEXT:    v_div_fixup_f16 v1, v1, v2, s6
 ; VI-NEXT:    v_trunc_f16_e32 v1, v1
 ; VI-NEXT:    v_fma_f16 v1, -v1, v2, s6
-; VI-NEXT:    v_cvt_f32_f16_e32 v2, s1
+; VI-NEXT:    v_cvt_f32_f16_e32 v2, s3
 ; VI-NEXT:    v_mul_f32_e32 v2, v2, v4
 ; VI-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; VI-NEXT:    v_mov_b32_e32 v4, s9
-; VI-NEXT:    v_div_fixup_f16 v2, v2, v3, s1
+; VI-NEXT:    v_div_fixup_f16 v2, v2, v3, s3
 ; VI-NEXT:    v_trunc_f16_e32 v2, v2
-; VI-NEXT:    v_fma_f16 v2, -v2, v3, s1
+; VI-NEXT:    v_fma_f16 v2, -v2, v3, s3
 ; VI-NEXT:    v_cvt_f32_f16_e32 v3, s7
 ; VI-NEXT:    v_mul_f32_e32 v3, v3, v5
 ; VI-NEXT:    v_cvt_f16_f32_e32 v3, v3
@@ -756,14 +756,14 @@ define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float
 ; CI-LABEL: frem_v2f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; CI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xd
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; CI-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x8
+; CI-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x0
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x8
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_mov_b32_e32 v0, s2
-; CI-NEXT:    v_div_scale_f32 v1, s[6:7], v0, v0, s0
-; CI-NEXT:    v_div_scale_f32 v2, vcc, s0, v0, s0
+; CI-NEXT:    v_mov_b32_e32 v0, s0
+; CI-NEXT:    v_div_scale_f32 v1, s[6:7], v0, v0, s2
+; CI-NEXT:    v_div_scale_f32 v2, vcc, s2, v0, s2
 ; CI-NEXT:    v_rcp_f32_e32 v3, v1
 ; CI-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; CI-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
@@ -774,12 +774,12 @@ define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float
 ; CI-NEXT:    v_fma_f32 v1, -v1, v4, v2
 ; CI-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
 ; CI-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
-; CI-NEXT:    v_div_fixup_f32 v1, v1, v0, s0
+; CI-NEXT:    v_div_fixup_f32 v1, v1, v0, s2
 ; CI-NEXT:    v_trunc_f32_e32 v1, v1
-; CI-NEXT:    v_fma_f32 v0, -v1, v0, s0
-; CI-NEXT:    v_mov_b32_e32 v1, s3
-; CI-NEXT:    v_div_scale_f32 v2, s[2:3], v1, v1, s1
-; CI-NEXT:    v_div_scale_f32 v3, vcc, s1, v1, s1
+; CI-NEXT:    v_fma_f32 v0, -v1, v0, s2
+; CI-NEXT:    v_mov_b32_e32 v1, s1
+; CI-NEXT:    v_div_scale_f32 v2, s[0:1], v1, v1, s3
+; CI-NEXT:    v_div_scale_f32 v3, vcc, s3, v1, s3
 ; CI-NEXT:    v_rcp_f32_e32 v4, v2
 ; CI-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; CI-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
@@ -792,23 +792,23 @@ define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float
 ; CI-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
 ; CI-NEXT:    s_mov_b32 s6, -1
 ; CI-NEXT:    s_mov_b32 s7, 0xf000
-; CI-NEXT:    v_div_fixup_f32 v2, v2, v1, s1
+; CI-NEXT:    v_div_fixup_f32 v2, v2, v1, s3
 ; CI-NEXT:    v_trunc_f32_e32 v2, v2
-; CI-NEXT:    v_fma_f32 v1, -v2, v1, s1
+; CI-NEXT:    v_fma_f32 v1, -v2, v1, s3
 ; CI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; CI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: frem_v2f32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; VI-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x20
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x0
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x20
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v0, s2
-; VI-NEXT:    v_div_scale_f32 v1, s[6:7], v0, v0, s0
-; VI-NEXT:    v_div_scale_f32 v2, vcc, s0, v0, s0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_div_scale_f32 v1, s[6:7], v0, v0, s2
+; VI-NEXT:    v_div_scale_f32 v2, vcc, s2, v0, s2
 ; VI-NEXT:    v_rcp_f32_e32 v3, v1
 ; VI-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; VI-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
@@ -819,12 +819,12 @@ define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float
 ; VI-NEXT:    v_fma_f32 v1, -v1, v4, v2
 ; VI-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
 ; VI-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
-; VI-NEXT:    v_div_fixup_f32 v1, v1, v0, s0
+; VI-NEXT:    v_div_fixup_f32 v1, v1, v0, s2
 ; VI-NEXT:    v_trunc_f32_e32 v1, v1
-; VI-NEXT:    v_fma_f32 v0, -v1, v0, s0
-; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    v_div_scale_f32 v2, s[2:3], v1, v1, s1
-; VI-NEXT:    v_div_scale_f32 v3, vcc, s1, v1, s1
+; VI-NEXT:    v_fma_f32 v0, -v1, v0, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_div_scale_f32 v2, s[0:1], v1, v1, s3
+; VI-NEXT:    v_div_scale_f32 v3, vcc, s3, v1, s3
 ; VI-NEXT:    v_rcp_f32_e32 v4, v2
 ; VI-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; VI-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
@@ -835,9 +835,9 @@ define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float
 ; VI-NEXT:    v_fma_f32 v2, -v2, v5, v3
 ; VI-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
 ; VI-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
-; VI-NEXT:    v_div_fixup_f32 v2, v2, v1, s1
+; VI-NEXT:    v_div_fixup_f32 v2, v2, v1, s3
 ; VI-NEXT:    v_trunc_f32_e32 v2, v2
-; VI-NEXT:    v_fma_f32 v1, -v2, v1, s1
+; VI-NEXT:    v_fma_f32 v1, -v2, v1, s3
 ; VI-NEXT:    v_mov_b32_e32 v2, s4
 ; VI-NEXT:    v_mov_b32_e32 v3, s5
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
index 4653057298b9..d45ac7730189 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
@@ -147,10 +147,9 @@ body: |
     ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64
     ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[EXTRACT]], [[EXTRACT2]]
     ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT1]], [[EXTRACT3]]
-    ; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[AND]](s64), 0
-    ; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[AND1]](s32), 64
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
+    ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[AND1]](s32)
+    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
     %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
     %2:_(s96) = G_AND %0, %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir
index a9664d19a3e0..c0472236bb09 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir
@@ -883,38 +883,28 @@ body: |
     ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
     ; GFX9: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[EXTRACT]], [[EXTRACT2]](<2 x s16>)
     ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[EXTRACT1]], [[EXTRACT3]](s16)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0
-    ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[ASHR]](<2 x s16>), 0
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV8]](<3 x s16>), 0
-    ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[ASHR1]](s16), 32
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<4 x s16>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; GFX9: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
-    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
+    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
     ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
-    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
     ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16)
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
-    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32)
-    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32)
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[COPY4]](s32)
+    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
     %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir
index c411bac56599..32dff5efe5cf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir
@@ -33,10 +33,9 @@ body: |
     ; CHECK-LABEL: name: test_constant_s96
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -4780896129847249538
     ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -547834910
-    ; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[C]](s64), 0
-    ; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[C1]](s32), 64
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+    ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[C1]](s32)
+    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
     %0:_(s96) = G_CONSTANT i96  -10105770365747857631829412482
     $vgpr0_vgpr1_vgpr2 = COPY %0
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
index 6574ba9cdacc..3b8679e9335d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
@@ -485,37 +485,107 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s24_align2
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4)
     ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 2, align 2, addrspace 4)
-    ; CI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 2, align 2, addrspace 4)
+    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI: $vgpr0 = COPY [[COPY5]](s32)
     ; VI-LABEL: name: test_load_constant_s24_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4)
     ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 2, align 2, addrspace 4)
-    ; VI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 2, align 2, addrspace 4)
+    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]]
+    ; VI: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_constant_s24_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4)
     ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 2, align 2, addrspace 4)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 2, align 2, addrspace 4)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]]
+    ; GFX9: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s24) = G_LOAD %0 :: (load 3, align 2, addrspace 4)
     %2:_(s32) = G_ANYEXT %1
@@ -530,37 +600,101 @@ body: |
 
     ; CI-LABEL: name: test_load_constant_s24_align1
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p4) :: (load 2, align 1, addrspace 4)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4)
+    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 2, addrspace 4)
-    ; CI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4)
+    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI: $vgpr0 = COPY [[COPY4]](s32)
     ; VI-LABEL: name: test_load_constant_s24_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p4) :: (load 2, align 1, addrspace 4)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4)
+    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 2, addrspace 4)
-    ; VI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4)
+    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_constant_s24_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p4) :: (load 2, align 1, addrspace 4)
-    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4)
+    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 2, addrspace 4)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4)
+    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s24) = G_LOAD %0 :: (load 3, align 1, addrspace 4)
     %2:_(s32) = G_ANYEXT %1
@@ -1052,7 +1186,6 @@ body: |
     ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 from unknown-address + 8, addrspace 4)
@@ -1064,10 +1197,8 @@ body: |
     ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_constant_s96_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
@@ -1094,7 +1225,6 @@ body: |
     ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 from unknown-address + 8, addrspace 4)
@@ -1106,10 +1236,8 @@ body: |
     ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_constant_s96_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
@@ -1136,7 +1264,6 @@ body: |
     ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 from unknown-address + 8, addrspace 4)
@@ -1148,10 +1275,8 @@ body: |
     ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load 12, align 2, addrspace 4)
@@ -1217,7 +1342,6 @@ body: |
     ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; CI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4)
@@ -1241,10 +1365,8 @@ body: |
     ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_constant_s96_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
@@ -1299,7 +1421,6 @@ body: |
     ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4)
@@ -1323,10 +1444,8 @@ body: |
     ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_constant_s96_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
@@ -1381,7 +1500,6 @@ body: |
     ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX9: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4)
@@ -1405,10 +1523,8 @@ body: |
     ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 4)
@@ -1427,10 +1543,9 @@ body: |
     ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 4 from unknown-address + 16, addrspace 4)
-    ; CI: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 128
-    ; CI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[INSERT1]](<5 x s32>)
+    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
     ; CI: S_NOP 0, implicit [[BITCAST]](s160)
     ; VI-LABEL: name: test_load_constant_s160_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
@@ -1438,10 +1553,9 @@ body: |
     ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 4 from unknown-address + 16, addrspace 4)
-    ; VI: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 128
-    ; VI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[INSERT1]](<5 x s32>)
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
     ; VI: S_NOP 0, implicit [[BITCAST]](s160)
     ; GFX9-LABEL: name: test_load_constant_s160_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
@@ -1449,10 +1563,9 @@ body: |
     ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 4 from unknown-address + 16, addrspace 4)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 128
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[INSERT1]](<5 x s32>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
     ; GFX9: S_NOP 0, implicit [[BITCAST]](s160)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s160) = G_LOAD %0 :: (load 20, align 4, addrspace 4)
@@ -1471,39 +1584,57 @@ body: |
     ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 12 from unknown-address + 16, align 4, addrspace 4)
-    ; CI: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](<3 x s32>), 128
-    ; CI: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[INSERT1]](<7 x s32>)
-    ; CI: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; CI: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[BITCAST]](s224), 0
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256)
+    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; CI: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; CI: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; CI: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; CI: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; CI: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     ; VI-LABEL: name: test_load_constant_s224_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4)
     ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 12 from unknown-address + 16, align 4, addrspace 4)
-    ; VI: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](<3 x s32>), 128
-    ; VI: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[INSERT1]](<7 x s32>)
-    ; VI: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; VI: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[BITCAST]](s224), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256)
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; VI: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; VI: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; VI: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; VI: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     ; GFX9-LABEL: name: test_load_constant_s224_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4)
     ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 12 from unknown-address + 16, align 4, addrspace 4)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](<3 x s32>), 128
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[INSERT1]](<7 x s32>)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[BITCAST]](s224), 0
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; GFX9: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; GFX9: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(s224) = G_LOAD %0 :: (load 28, align 4, addrspace 4)
      %2:_(s256) = G_IMPLICIT_DEF
@@ -3974,93 +4105,81 @@ body: |
     ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4)
+    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
     ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4)
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; CI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; VI-LABEL: name: test_load_constant_v3s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4)
     ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
     ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v3s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4)
     ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
     ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
     ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
+    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 4)
     %2:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -4103,14 +4222,8 @@ body: |
     ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
     ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
     ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4)
     ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
     ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4)
@@ -4119,23 +4232,26 @@ body: |
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
     ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
+    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32
-    ; CI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; VI-LABEL: name: test_load_constant_v3s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4)
@@ -4161,14 +4277,8 @@ body: |
     ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
     ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
     ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4)
     ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
     ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4)
@@ -4176,22 +4286,25 @@ body: |
     ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
     ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
     ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32
-    ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v3s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4)
@@ -4217,9 +4330,6 @@ body: |
     ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
     ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
     ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
     ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4)
@@ -4231,20 +4341,18 @@ body: |
     ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
+    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 4)
     %2:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -5745,36 +5853,33 @@ body: |
     ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load 8 from unknown-address + 16, addrspace 4)
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; CI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_constant_v3s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4)
     ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load 8 from unknown-address + 16, addrspace 4)
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_constant_v3s64_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4)
     ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load 8 from unknown-address + 16, addrspace 4)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 8, addrspace 4)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -5914,7 +6019,6 @@ body: |
     ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
     ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
     ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
     ; CI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CI: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C12]](s64)
     ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load 1 from unknown-address + 16, addrspace 4)
@@ -5973,12 +6077,10 @@ body: |
     ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
     ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
     ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s64>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[MV2]](s64), 128
-    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_constant_v3s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4)
@@ -6089,7 +6191,6 @@ body: |
     ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
     ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
     ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
     ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; VI: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64)
     ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load 1 from unknown-address + 16, addrspace 4)
@@ -6140,12 +6241,10 @@ body: |
     ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
     ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
     ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s64>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[MV2]](s64), 128
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_constant_v3s64_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4)
@@ -6256,7 +6355,6 @@ body: |
     ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
     ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
     ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
     ; GFX9: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; GFX9: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64)
     ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load 1 from unknown-address + 16, addrspace 4)
@@ -6307,12 +6405,10 @@ body: |
     ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
     ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
     ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s64>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[MV2]](s64), 128
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 1, addrspace 4)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -8080,7 +8176,6 @@ body: |
     ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; CI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 1)
@@ -8104,11 +8199,8 @@ body: |
     ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[COPY13:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY13]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
     ; CI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
     ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 1)
@@ -8118,18 +8210,18 @@ body: |
     ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 1)
     ; CI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
     ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 1)
-    ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
-    ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
     ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
     ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
     ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
     ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
+    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
     ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
     ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; CI: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
@@ -8140,21 +8232,20 @@ body: |
     ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load 1 from unknown-address + 18, addrspace 1)
     ; CI: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
     ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load 1 from unknown-address + 19, addrspace 1)
-    ; CI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
-    ; CI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
-    ; CI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
-    ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; CI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
+    ; CI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; CI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
+    ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
     ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
     ; CI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; CI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
-    ; CI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; CI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
+    ; CI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
     ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
     ; CI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; CI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
-    ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; CI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
+    ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
     ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
     ; CI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32)
     ; CI: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
     ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load 1 from unknown-address + 20, addrspace 1)
     ; CI: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
@@ -8163,27 +8254,26 @@ body: |
     ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load 1 from unknown-address + 22, addrspace 1)
     ; CI: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
     ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load 1 from unknown-address + 23, addrspace 1)
-    ; CI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
-    ; CI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
-    ; CI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
-    ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; CI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
+    ; CI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; CI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
+    ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
     ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
     ; CI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; CI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
-    ; CI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; CI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
+    ; CI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
     ; CI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
     ; CI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; CI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
-    ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
+    ; CI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
+    ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
     ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
     ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; CI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; CI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR17]](s32), 64
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; CI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI: [[COPY27:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY26]](s96)
-    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY27]](s96)
+    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI: [[COPY25:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96)
+    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96)
     ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 1)
@@ -8237,7 +8327,6 @@ body: |
     ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 1)
@@ -8261,11 +8350,8 @@ body: |
     ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[COPY13:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY13]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
     ; VI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
     ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 1)
@@ -8275,18 +8361,18 @@ body: |
     ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 1)
     ; VI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
     ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 1)
-    ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
-    ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
     ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
     ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
     ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
     ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
+    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
     ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
     ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; VI: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
@@ -8297,21 +8383,20 @@ body: |
     ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load 1 from unknown-address + 18, addrspace 1)
     ; VI: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
     ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load 1 from unknown-address + 19, addrspace 1)
-    ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
-    ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
-    ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
-    ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
+    ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
+    ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
     ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
     ; VI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
-    ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
+    ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
     ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
     ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
-    ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
+    ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
     ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
     ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32)
     ; VI: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
     ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load 1 from unknown-address + 20, addrspace 1)
     ; VI: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
@@ -8320,27 +8405,26 @@ body: |
     ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load 1 from unknown-address + 22, addrspace 1)
     ; VI: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
     ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load 1 from unknown-address + 23, addrspace 1)
-    ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
-    ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
-    ; VI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
-    ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
+    ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
+    ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
     ; VI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
     ; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; VI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
-    ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; VI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
+    ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
     ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
     ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; VI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
-    ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
+    ; VI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
+    ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
     ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
     ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; VI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR17]](s32), 64
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; VI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY27:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY26]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY27]](s96)
+    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; VI: [[COPY25:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96)
+    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96)
     ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 1)
@@ -8394,7 +8478,6 @@ body: |
     ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX9: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 1)
@@ -8418,11 +8501,8 @@ body: |
     ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[COPY13:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY13]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
     ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64)
     ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 1)
@@ -8432,18 +8512,18 @@ body: |
     ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 1)
     ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
     ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 1)
-    ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
-    ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
     ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
     ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
     ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
     ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
+    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
     ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
     ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; GFX9: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
@@ -8454,21 +8534,20 @@ body: |
     ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load 1 from unknown-address + 18, addrspace 1)
     ; GFX9: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
     ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load 1 from unknown-address + 19, addrspace 1)
-    ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
-    ; GFX9: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
-    ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
-    ; GFX9: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
+    ; GFX9: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
+    ; GFX9: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
     ; GFX9: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
     ; GFX9: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
-    ; GFX9: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
+    ; GFX9: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
     ; GFX9: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
     ; GFX9: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
-    ; GFX9: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
+    ; GFX9: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
     ; GFX9: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
     ; GFX9: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32)
     ; GFX9: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
     ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load 1 from unknown-address + 20, addrspace 1)
     ; GFX9: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
@@ -8477,27 +8556,26 @@ body: |
     ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load 1 from unknown-address + 22, addrspace 1)
     ; GFX9: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
     ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load 1 from unknown-address + 23, addrspace 1)
-    ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
-    ; GFX9: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
-    ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
-    ; GFX9: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
+    ; GFX9: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
+    ; GFX9: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
     ; GFX9: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
     ; GFX9: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; GFX9: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
-    ; GFX9: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
+    ; GFX9: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
     ; GFX9: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
     ; GFX9: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; GFX9: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
-    ; GFX9: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
+    ; GFX9: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
+    ; GFX9: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
     ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
     ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; GFX9: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR17]](s32), 64
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; GFX9: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9: [[COPY27:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY26]](s96)
-    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY27]](s96)
+    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX9: [[COPY25:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96)
+    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 1, addrspace 1)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -8537,7 +8615,6 @@ body: |
     ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 from unknown-address + 8, addrspace 1)
@@ -8549,50 +8626,45 @@ body: |
     ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[COPY7:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY7]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
     ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 2 from unknown-address + 12, addrspace 1)
     ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
     ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 2 from unknown-address + 14, addrspace 1)
-    ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
-    ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
+    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
     ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
     ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
     ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
     ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 2 from unknown-address + 16, addrspace 1)
     ; CI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
     ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 2 from unknown-address + 18, addrspace 1)
-    ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
-    ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
     ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
     ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32)
     ; CI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
     ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 2 from unknown-address + 20, addrspace 1)
     ; CI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
     ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 2 from unknown-address + 22, addrspace 1)
-    ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
-    ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+    ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
+    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
     ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
     ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; CI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; CI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR5]](s32), 64
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; CI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI: [[COPY15:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY14]](s96)
-    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY15]](s96)
+    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI: [[COPY13:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96)
+    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96)
     ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 1)
@@ -8618,7 +8690,6 @@ body: |
     ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 from unknown-address + 8, addrspace 1)
@@ -8630,50 +8701,45 @@ body: |
     ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[COPY7:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY7]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
     ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 2 from unknown-address + 12, addrspace 1)
     ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
     ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 2 from unknown-address + 14, addrspace 1)
-    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
-    ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
     ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
     ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
     ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
     ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 2 from unknown-address + 16, addrspace 1)
     ; VI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
     ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 2 from unknown-address + 18, addrspace 1)
-    ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
-    ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
     ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
     ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32)
     ; VI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
     ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 2 from unknown-address + 20, addrspace 1)
     ; VI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
     ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 2 from unknown-address + 22, addrspace 1)
-    ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
-    ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+    ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
+    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
     ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
     ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; VI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR5]](s32), 64
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; VI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY15:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY14]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY15]](s96)
+    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; VI: [[COPY13:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96)
+    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96)
     ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 1)
@@ -8699,7 +8765,6 @@ body: |
     ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 from unknown-address + 8, addrspace 1)
@@ -8711,50 +8776,45 @@ body: |
     ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[COPY7:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY7]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
     ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 2 from unknown-address + 12, addrspace 1)
     ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
     ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 2 from unknown-address + 14, addrspace 1)
-    ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
-    ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
+    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
     ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
     ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
     ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
     ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 2 from unknown-address + 16, addrspace 1)
     ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
     ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 2 from unknown-address + 18, addrspace 1)
-    ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
-    ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
     ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
     ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32)
     ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
     ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 2 from unknown-address + 20, addrspace 1)
     ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
     ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 2 from unknown-address + 22, addrspace 1)
-    ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
-    ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+    ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
+    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
     ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
     ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; GFX9: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR5]](s32), 64
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; GFX9: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9: [[COPY15:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY14]](s96)
-    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY15]](s96)
+    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX9: [[COPY13:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96)
+    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 2, addrspace 1)
     %2:_(s96) = G_EXTRACT %1, 0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
index 07b19653beeb..554bb7887eff 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
@@ -908,7 +908,6 @@ body: |
     ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 2 from unknown-address + 8)
@@ -920,10 +919,8 @@ body: |
     ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_flat_s96_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
@@ -950,7 +947,6 @@ body: |
     ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 2 from unknown-address + 8)
@@ -962,10 +958,8 @@ body: |
     ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_flat_s96_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
@@ -992,7 +986,6 @@ body: |
     ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 2 from unknown-address + 8)
@@ -1004,10 +997,8 @@ body: |
     ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load 12, align 2, addrspace 0)
@@ -1073,7 +1064,6 @@ body: |
     ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; CI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; CI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8)
@@ -1097,10 +1087,8 @@ body: |
     ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_flat_s96_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
@@ -1155,7 +1143,6 @@ body: |
     ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; VI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8)
@@ -1179,10 +1166,8 @@ body: |
     ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_flat_s96_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
@@ -1237,7 +1222,6 @@ body: |
     ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX9: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8)
@@ -1261,10 +1245,8 @@ body: |
     ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 0)
@@ -1283,10 +1265,9 @@ body: |
     ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 4 from unknown-address + 16)
-    ; CI: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 128
-    ; CI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[INSERT1]](<5 x s32>)
+    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
     ; CI: S_NOP 0, implicit [[BITCAST]](s160)
     ; VI-LABEL: name: test_load_flat_s160_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
@@ -1294,10 +1275,9 @@ body: |
     ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 4 from unknown-address + 16)
-    ; VI: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 128
-    ; VI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[INSERT1]](<5 x s32>)
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
     ; VI: S_NOP 0, implicit [[BITCAST]](s160)
     ; GFX9-LABEL: name: test_load_flat_s160_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
@@ -1305,10 +1285,9 @@ body: |
     ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 4 from unknown-address + 16)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 128
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[INSERT1]](<5 x s32>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
     ; GFX9: S_NOP 0, implicit [[BITCAST]](s160)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s160) = G_LOAD %0 :: (load 20, align 4, addrspace 0)
@@ -1327,39 +1306,57 @@ body: |
     ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 12 from unknown-address + 16, align 4)
-    ; CI: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](<3 x s32>), 128
-    ; CI: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[INSERT1]](<7 x s32>)
-    ; CI: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; CI: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[BITCAST]](s224), 0
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256)
+    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; CI: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; CI: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; CI: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; CI: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; CI: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     ; VI-LABEL: name: test_load_flat_s224_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4)
     ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 12 from unknown-address + 16, align 4)
-    ; VI: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](<3 x s32>), 128
-    ; VI: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[INSERT1]](<7 x s32>)
-    ; VI: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; VI: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[BITCAST]](s224), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256)
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; VI: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; VI: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; VI: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; VI: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     ; GFX9-LABEL: name: test_load_flat_s224_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4)
     ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 12 from unknown-address + 16, align 4)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](<3 x s32>), 128
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[INSERT1]](<7 x s32>)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[BITCAST]](s224), 0
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; GFX9: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; GFX9: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(s224) = G_LOAD %0 :: (load 28, align 4, addrspace 0)
      %2:_(s256) = G_IMPLICIT_DEF
@@ -3854,93 +3851,81 @@ body: |
     ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4)
+    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
     ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4)
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; CI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; VI-LABEL: name: test_load_flat_v3s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2)
     ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
     ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v3s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2)
     ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
     ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
     ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
+    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 0)
     %2:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -3983,14 +3968,8 @@ body: |
     ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
     ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
     ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4)
     ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
     ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5)
@@ -3999,23 +3978,26 @@ body: |
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
     ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
+    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32
-    ; CI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; VI-LABEL: name: test_load_flat_v3s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1)
@@ -4041,14 +4023,8 @@ body: |
     ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
     ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
     ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4)
     ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
     ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5)
@@ -4056,22 +4032,25 @@ body: |
     ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
     ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
     ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32
-    ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v3s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1)
@@ -4097,9 +4076,6 @@ body: |
     ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
     ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
     ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
     ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4)
@@ -4111,20 +4087,18 @@ body: |
     ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
+    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 0)
     %2:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -5336,36 +5310,33 @@ body: |
     ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 from unknown-address + 16, align 16)
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; CI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_flat_v3s64_align32
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 32)
     ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 from unknown-address + 16, align 16)
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_flat_v3s64_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 32)
     ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 from unknown-address + 16, align 16)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 32, addrspace 0)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -5385,36 +5356,33 @@ body: |
     ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 from unknown-address + 16)
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; CI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_flat_v3s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 8)
     ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 from unknown-address + 16)
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_flat_v3s64_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 8)
     ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 from unknown-address + 16)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 8, addrspace 0)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -5554,7 +5522,6 @@ body: |
     ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
     ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
     ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
     ; CI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CI: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64)
     ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load 1 from unknown-address + 16)
@@ -5613,12 +5580,10 @@ body: |
     ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
     ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
     ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s64>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[MV2]](s64), 128
-    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_flat_v3s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1)
@@ -5729,7 +5694,6 @@ body: |
     ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
     ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
     ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
     ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; VI: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
     ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load 1 from unknown-address + 16)
@@ -5780,12 +5744,10 @@ body: |
     ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
     ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
     ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s64>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[MV2]](s64), 128
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_flat_v3s64_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1)
@@ -5896,7 +5858,6 @@ body: |
     ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
     ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
     ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
     ; GFX9: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; GFX9: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
     ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load 1 from unknown-address + 16)
@@ -5947,12 +5908,10 @@ body: |
     ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
     ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
     ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s64>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[MV2]](s64), 128
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 1, addrspace 0)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
index 420d67cd24c6..da62fce5b1e8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
@@ -753,15 +753,41 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s24_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
     ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, align 2, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, align 2, addrspace 1)
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; SI: $vgpr0 = COPY [[COPY5]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s24_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1)
@@ -775,26 +801,74 @@ body: |
     ; CI-HSA: $vgpr0 = COPY [[COPY1]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s24_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
     ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, align 2, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, align 2, addrspace 1)
+    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-MESA: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[COPY5]](s32)
     ; VI-LABEL: name: test_load_global_s24_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
     ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, align 2, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, align 2, addrspace 1)
+    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]]
+    ; VI: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s24_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1)
@@ -808,15 +882,37 @@ body: |
     ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s24_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
     ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, align 2, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, align 2, addrspace 1)
+    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]]
+    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16)
+    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]]
+    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16)
+    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s24) = G_LOAD %0 :: (load 3, align 2, addrspace 1)
     %2:_(s32) = G_ANYEXT %1
@@ -831,15 +927,39 @@ body: |
 
     ; SI-LABEL: name: test_load_global_s24_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; SI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1)
-    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1)
+    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1)
+    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; SI: $vgpr0 = COPY [[COPY4]](s32)
     ; CI-HSA-LABEL: name: test_load_global_s24_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1)
@@ -853,26 +973,70 @@ body: |
     ; CI-HSA: $vgpr0 = COPY [[COPY1]](s32)
     ; CI-MESA-LABEL: name: test_load_global_s24_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1)
-    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1)
+    ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1)
+    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[COPY4]](s32)
     ; VI-LABEL: name: test_load_global_s24_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; VI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1)
-    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1)
+    ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1)
+    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_s24_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1)
@@ -886,15 +1050,35 @@ body: |
     ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_s24_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1)
-    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1)
+    ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1)
+    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1)
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s24) = G_LOAD %0 :: (load 3, align 1, addrspace 1)
     %2:_(s32) = G_ANYEXT %1
@@ -1496,10 +1680,9 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 8, align 8, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-HSA-LABEL: name: test_load_global_s96_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -1543,10 +1726,9 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 8, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-HSA-LABEL: name: test_load_global_s96_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -1609,7 +1791,6 @@ body: |
     ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
@@ -1621,10 +1802,8 @@ body: |
     ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-HSA-LABEL: name: test_load_global_s96_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -1656,7 +1835,6 @@ body: |
     ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
@@ -1668,10 +1846,8 @@ body: |
     ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_global_s96_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -1698,7 +1874,6 @@ body: |
     ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
@@ -1710,10 +1885,8 @@ body: |
     ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-HSA-LABEL: name: test_load_global_s96_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -1745,7 +1918,6 @@ body: |
     ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
@@ -1757,10 +1929,8 @@ body: |
     ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load 12, align 2, addrspace 1)
@@ -1826,7 +1996,6 @@ body: |
     ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
@@ -1850,10 +2019,8 @@ body: |
     ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-HSA-LABEL: name: test_load_global_s96_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -1913,7 +2080,6 @@ body: |
     ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
@@ -1937,10 +2103,8 @@ body: |
     ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_global_s96_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -1995,7 +2159,6 @@ body: |
     ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
@@ -2019,10 +2182,8 @@ body: |
     ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-HSA-LABEL: name: test_load_global_s96_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -2082,7 +2243,6 @@ body: |
     ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
@@ -2106,10 +2266,8 @@ body: |
     ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 1)
@@ -2133,10 +2291,9 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 16, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 128
-    ; SI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[INSERT1]](<5 x s32>)
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
     ; SI: S_NOP 0, implicit [[BITCAST]](s160)
     ; CI-HSA-LABEL: name: test_load_global_s160_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -2144,10 +2301,9 @@ body: |
     ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 16, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; CI-HSA: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 128
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[INSERT1]](<5 x s32>)
+    ; CI-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
     ; CI-HSA: S_NOP 0, implicit [[BITCAST]](s160)
     ; CI-MESA-LABEL: name: test_load_global_s160_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -2155,10 +2311,9 @@ body: |
     ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 16, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 128
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[INSERT1]](<5 x s32>)
+    ; CI-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
     ; CI-MESA: S_NOP 0, implicit [[BITCAST]](s160)
     ; VI-LABEL: name: test_load_global_s160_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -2166,10 +2321,9 @@ body: |
     ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 16, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 128
-    ; VI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[INSERT1]](<5 x s32>)
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
     ; VI: S_NOP 0, implicit [[BITCAST]](s160)
     ; GFX9-HSA-LABEL: name: test_load_global_s160_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -2177,10 +2331,9 @@ body: |
     ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 16, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; GFX9-HSA: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 128
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[INSERT1]](<5 x s32>)
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
     ; GFX9-HSA: S_NOP 0, implicit [[BITCAST]](s160)
     ; GFX9-MESA-LABEL: name: test_load_global_s160_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -2188,10 +2341,9 @@ body: |
     ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 16, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 128
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[INSERT1]](<5 x s32>)
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>)
     ; GFX9-MESA: S_NOP 0, implicit [[BITCAST]](s160)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s160) = G_LOAD %0 :: (load 20, align 4, addrspace 1)
@@ -2213,81 +2365,114 @@ body: |
     ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
     ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 4 from unknown-address + 24, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD1]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD2]](s32), 64
-    ; SI: [[DEF1:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT2:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[DEF1]], [[LOAD]](<4 x s32>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[INSERT2]], [[INSERT1]](<3 x s32>), 128
-    ; SI: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[INSERT3]](<7 x s32>)
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
+    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[UV4]](s32)
+    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV]](s32), [[UV1]](s32)
+    ; SI: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD2]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; SI: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; SI: [[UV6:%[0-9]+]]:_(<7 x s32>), [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV6]](<7 x s32>)
     ; SI: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; SI: [[INSERT4:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT4]](s256)
+    ; SI: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     ; CI-HSA-LABEL: name: test_load_global_s224_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
     ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 16, align 4, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; CI-HSA: [[INSERT1:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](<3 x s32>), 128
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[INSERT1]](<7 x s32>)
-    ; CI-HSA: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; CI-HSA: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[BITCAST]](s224), 0
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256)
+    ; CI-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI-HSA: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; CI-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; CI-HSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; CI-HSA: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; CI-HSA: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; CI-HSA: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; CI-HSA: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     ; CI-MESA-LABEL: name: test_load_global_s224_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
     ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 16, align 4, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](<3 x s32>), 128
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[INSERT1]](<7 x s32>)
-    ; CI-MESA: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[BITCAST]](s224), 0
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256)
+    ; CI-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; CI-MESA: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; CI-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; CI-MESA: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; CI-MESA: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; CI-MESA: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     ; VI-LABEL: name: test_load_global_s224_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
     ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 16, align 4, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](<3 x s32>), 128
-    ; VI: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[INSERT1]](<7 x s32>)
-    ; VI: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; VI: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[BITCAST]](s224), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256)
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; VI: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; VI: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; VI: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; VI: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     ; GFX9-HSA-LABEL: name: test_load_global_s224_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
     ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 16, align 4, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; GFX9-HSA: [[INSERT1:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](<3 x s32>), 128
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[INSERT1]](<7 x s32>)
-    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[BITCAST]](s224), 0
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256)
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-HSA: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; GFX9-HSA: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     ; GFX9-MESA-LABEL: name: test_load_global_s224_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
     ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 16, align 4, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[DEF]], [[LOAD]](<4 x s32>), 0
-    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<7 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](<3 x s32>), 128
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[INSERT1]](<7 x s32>)
-    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[BITCAST]](s224), 0
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256)
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+    ; GFX9-MESA: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
+    ; GFX9-MESA: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
+    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s224) = G_LOAD %0 :: (load 28, align 4, addrspace 1)
      %2:_(s256) = G_IMPLICIT_DEF
@@ -6296,33 +6481,30 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1)
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
     ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1)
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; SI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v3s16_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 2, addrspace 1)
@@ -6335,66 +6517,60 @@ body: |
     ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1)
+    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
     ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
     ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; VI-LABEL: name: test_load_global_v3s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
     ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
     ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 2, addrspace 1)
@@ -6407,27 +6583,21 @@ body: |
     ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
     ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
     ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0
-    ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
+    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 1)
     %2:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -6470,14 +6640,8 @@ body: |
     ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
     ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
     ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1)
     ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
     ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1)
@@ -6486,23 +6650,26 @@ body: |
     ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
     ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
+    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32
-    ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; SI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v3s16_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 1, addrspace 1)
@@ -6538,14 +6705,8 @@ body: |
     ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
     ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
     ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1)
     ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
     ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1)
@@ -6554,23 +6715,26 @@ body: |
     ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
     ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
-    ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
+    ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32
-    ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; VI-LABEL: name: test_load_global_v3s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1)
@@ -6596,14 +6760,8 @@ body: |
     ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
     ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
     ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1)
     ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
     ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1)
@@ -6611,22 +6769,25 @@ body: |
     ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
     ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
     ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32
-    ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 1, addrspace 1)
@@ -6658,9 +6819,6 @@ body: |
     ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
     ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
     ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
     ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1)
@@ -6672,20 +6830,18 @@ body: |
     ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0
-    ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32
-    ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-MESA: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32)
+    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 1)
     %2:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -7419,29 +7575,19 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 8, align 8, addrspace 1)
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[LOAD]](<4 x s16>), 0
-    ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; SI: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
-    ; SI: [[INSERT2:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 64
-    ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<6 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
     ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
     ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
-    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
+    ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
     ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
-    ; SI: [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
-    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
     ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
     ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
@@ -7449,24 +7595,24 @@ body: |
     ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
     ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
     ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
     ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
     ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]]
     ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
     ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
     ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]]
-    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
     ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]]
     ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
     ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[BITCAST6]](<2 x s16>)
+    ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
+    ; SI: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
+    ; SI: $vgpr2 = COPY [[BITCAST5]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v5s16_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 8, addrspace 1)
@@ -7550,29 +7696,19 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 8, align 4, addrspace 1)
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[LOAD]](<4 x s16>), 0
-    ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; SI: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
-    ; SI: [[INSERT2:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 64
-    ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<6 x s16>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
     ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
     ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
-    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
+    ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
     ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
-    ; SI: [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
-    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
     ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
     ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
@@ -7580,24 +7716,24 @@ body: |
     ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
     ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
     ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
     ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
     ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]]
     ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
     ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
     ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]]
-    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
     ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]]
     ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
     ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[BITCAST6]](<2 x s16>)
+    ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
+    ; SI: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
+    ; SI: $vgpr2 = COPY [[BITCAST5]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v5s16_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 4, addrspace 1)
@@ -7687,72 +7823,42 @@ body: |
     ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
     ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
     ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
+    ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
     ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
     ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
     ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
     ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
     ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[CONCAT_VECTORS]](<4 x s16>), 0
-    ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; SI: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<30 x s16>)
-    ; SI: [[INSERT2:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 64
-    ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<6 x s16>)
-    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32)
-    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
-    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
-    ; SI: [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
-    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
-    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
-    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
     ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
     ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
-    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
-    ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; SI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v5s16_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 2, addrspace 1)
@@ -7777,72 +7883,42 @@ body: |
     ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
     ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
     ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
     ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
     ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
     ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
     ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
     ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
     ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0
-    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[CONCAT_VECTORS]](<4 x s16>), 0
-    ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; CI-MESA: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<30 x s16>)
-    ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0
-    ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 64
-    ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<6 x s16>)
-    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32)
-    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
-    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
-    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
-    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
-    ; CI-MESA: [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
-    ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
-    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
-    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
-    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
+    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
     ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
     ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
-    ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
-    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
-    ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
-    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; CI-MESA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; CI-MESA: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
+    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v5s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
@@ -7855,72 +7931,42 @@ body: |
     ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
     ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
     ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
     ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
     ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
     ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
     ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[CONCAT_VECTORS]](<4 x s16>), 0
-    ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; VI: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<30 x s16>)
-    ; VI: [[INSERT2:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 64
-    ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<6 x s16>)
-    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32)
-    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
-    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
-    ; VI: [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
-    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
-    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
-    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
     ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
     ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
-    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
-    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
-    ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 2, addrspace 1)
@@ -7945,51 +7991,29 @@ body: |
     ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
     ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
     ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
     ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
     ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0
-    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[CONCAT_VECTORS]](<4 x s16>), 0
-    ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; GFX9-MESA: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<30 x s16>)
-    ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0
-    ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 64
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<6 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
     ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
-    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
-    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C4]](s32)
-    ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
-    ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32)
-    ; GFX9-MESA: [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
-    ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
-    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
-    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
-    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
     ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
-    ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
-    ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
-    ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
-    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
+    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x s16>) = G_LOAD %0 :: (load 10, align 2, addrspace 1)
     %2:_(<5 x s16>) = G_IMPLICIT_DEF
@@ -8036,14 +8060,8 @@ body: |
     ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
     ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
     ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1)
     ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
     ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1)
@@ -8052,9 +8070,9 @@ body: |
     ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
     ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
+    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
     ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
     ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1)
     ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
@@ -8064,17 +8082,11 @@ body: |
     ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
     ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32)
-    ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32)
+    ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
     ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
     ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
     ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1)
@@ -8083,55 +8095,37 @@ body: |
     ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
     ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32)
-    ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; SI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[CONCAT_VECTORS]](<4 x s16>), 0
-    ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; SI: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<30 x s16>)
-    ; SI: [[INSERT2:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT2]], [[OR6]](s16), 64
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32)
+    ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<6 x s16>)
-    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C5]](s32)
-    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32)
-    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C5]](s32)
-    ; SI: [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
-    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C5]](s32)
+    ; SI: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C7]](s32)
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C7]](s32)
+    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]]
+    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C7]](s32)
+    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL6]]
+    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+    ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
     ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
     ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C8]]
-    ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C8]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C5]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL7]]
-    ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
-    ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C8]]
-    ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C8]]
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C5]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL8]]
-    ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
-    ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C8]]
-    ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C8]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C5]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL9]]
-    ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
+    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL7]]
+    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; SI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v5s16_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 1, addrspace 1)
@@ -8173,14 +8167,8 @@ body: |
     ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
     ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
     ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1)
     ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
     ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1)
@@ -8189,9 +8177,9 @@ body: |
     ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
     ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
-    ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
+    ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
     ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
     ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1)
     ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
@@ -8201,17 +8189,11 @@ body: |
     ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
     ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32)
-    ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32)
+    ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
     ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
     ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
     ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1)
@@ -8220,55 +8202,37 @@ body: |
     ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
     ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
-    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32)
-    ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0
-    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[CONCAT_VECTORS]](<4 x s16>), 0
-    ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; CI-MESA: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<30 x s16>)
-    ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0
-    ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT2]], [[OR6]](s16), 64
+    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32)
+    ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<6 x s16>)
-    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C5]](s32)
-    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
-    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32)
-    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
-    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C5]](s32)
-    ; CI-MESA: [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
-    ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
-    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C5]](s32)
+    ; CI-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C7]](s32)
+    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C7]](s32)
+    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]]
+    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C7]](s32)
+    ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL6]]
+    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+    ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
     ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
     ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C8]]
-    ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C8]]
-    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C5]](s32)
-    ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL7]]
-    ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
-    ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C8]]
-    ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C8]]
-    ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C5]](s32)
-    ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL8]]
-    ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
-    ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
-    ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C8]]
-    ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
-    ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C8]]
-    ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C5]](s32)
-    ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL9]]
-    ; CI-MESA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
-    ; CI-MESA: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
+    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL7]]
+    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v5s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1)
@@ -8294,14 +8258,8 @@ body: |
     ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
     ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
     ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1)
     ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
     ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1)
@@ -8309,8 +8267,8 @@ body: |
     ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
     ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
     ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
     ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
     ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1)
     ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
@@ -8319,16 +8277,10 @@ body: |
     ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
     ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
     ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C4]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
     ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
     ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1)
@@ -8336,54 +8288,36 @@ body: |
     ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
     ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
     ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16)
-    ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
-    ; VI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[CONCAT_VECTORS]](<4 x s16>), 0
-    ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; VI: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<30 x s16>)
-    ; VI: [[INSERT2:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT2]], [[OR6]](s16), 64
+    ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16)
+    ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]]
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<6 x s16>)
-    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32)
-    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
-    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
-    ; VI: [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
-    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
+    ; VI: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]]
+    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32)
+    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL6]]
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+    ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
     ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
     ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]]
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32)
-    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL7]]
-    ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]]
-    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]]
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL8]]
-    ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
-    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]]
-    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C4]](s32)
-    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL9]]
-    ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
+    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C6]](s32)
+    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL7]]
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+    ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+    ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 1, addrspace 1)
@@ -8421,9 +8355,6 @@ body: |
     ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
     ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
     ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
     ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1)
@@ -8445,10 +8376,6 @@ body: |
     ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
     ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
     ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9-MESA: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9-MESA: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
     ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
@@ -8460,40 +8387,26 @@ body: |
     ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]]
     ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16)
     ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]]
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0
-    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT]], [[CONCAT_VECTORS]](<4 x s16>), 0
-    ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
-    ; GFX9-MESA: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<30 x s16>)
-    ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0
-    ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[INSERT2]], [[OR4]](s16), 64
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<6 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
     ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
-    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
-    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32)
-    ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
-    ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C6]](s32)
-    ; GFX9-MESA: [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>)
-    ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
-    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C6]](s32)
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-MESA: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9-MESA: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
+    ; GFX9-MESA: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR4]](s16)
     ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
-    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
-    ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
-    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
-    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[COPY1]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<5 x s16>) = G_LOAD %0 :: (load 10, align 1, addrspace 1)
     %2:_(<5 x s16>) = G_IMPLICIT_DEF
@@ -8559,10 +8472,9 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 8, align 8, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v6s16_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -8606,10 +8518,9 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 8, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v6s16_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -8672,7 +8583,6 @@ body: |
     ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
@@ -8684,10 +8594,8 @@ body: |
     ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v6s16_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -8719,7 +8627,6 @@ body: |
     ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
@@ -8731,10 +8638,8 @@ body: |
     ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; VI-LABEL: name: test_load_global_v6s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -8761,7 +8666,6 @@ body: |
     ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
@@ -8773,10 +8677,8 @@ body: |
     ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -8808,7 +8710,6 @@ body: |
     ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
@@ -8820,10 +8721,8 @@ body: |
     ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<6 x s16>) = G_LOAD %0 :: (load 12, align 2, addrspace 1)
@@ -8889,7 +8788,6 @@ body: |
     ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
@@ -8913,10 +8811,8 @@ body: |
     ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v6s16_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -8976,7 +8872,6 @@ body: |
     ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
@@ -9000,10 +8895,8 @@ body: |
     ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; VI-LABEL: name: test_load_global_v6s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -9058,7 +8951,6 @@ body: |
     ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
@@ -9082,10 +8974,8 @@ body: |
     ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -9145,7 +9035,6 @@ body: |
     ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
@@ -9169,10 +9058,8 @@ body: |
     ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<6 x s16>) = G_LOAD %0 :: (load 12, align 1, addrspace 1)
@@ -9685,105 +9572,162 @@ body: |
     ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
     ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
     ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
+    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1)
+    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1)
+    ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C4]](s32)
+    ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
+    ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
+    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
+    ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32)
+    ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C4]](s32)
+    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
     ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
     ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
     ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
     ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
     ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
-    ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1)
+    ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
     ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
-    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
     ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
-    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
     ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
     ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1)
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST2]](<2 x s16>), 0
-    ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; SI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>)
-    ; SI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
-    ; SI: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>)
-    ; SI: [[INSERT4:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV12]](<7 x s16>), 0
-    ; SI: [[INSERT5:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT4]], [[CONCAT_VECTORS]](<4 x s16>), 0
-    ; SI: [[CONCAT_VECTORS5:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
-    ; SI: [[UV20:%[0-9]+]]:_(<7 x s16>), [[UV21:%[0-9]+]]:_(<7 x s16>), [[UV22:%[0-9]+]]:_(<7 x s16>), [[UV23:%[0-9]+]]:_(<7 x s16>), [[UV24:%[0-9]+]]:_(<7 x s16>), [[UV25:%[0-9]+]]:_(<7 x s16>), [[UV26:%[0-9]+]]:_(<7 x s16>), [[UV27:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<56 x s16>)
-    ; SI: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV20]](<7 x s16>), 0
-    ; SI: [[INSERT7:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT6]], [[UV8]](<3 x s16>), 64
-    ; SI: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<8 x s16>)
-    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV28]](<2 x s16>)
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
-    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV29]](<2 x s16>)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
-    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV30]](<2 x s16>)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
-    ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV31]](<2 x s16>)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32)
-    ; SI: [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>)
-    ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV32]](<2 x s16>)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C4]](s32)
-    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
-    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]]
+    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C4]](s32)
     ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
-    ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
-    ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
-    ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]]
-    ; SI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST8]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[BITCAST9]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[BITCAST10]](<2 x s16>)
-    ; SI: $vgpr3 = COPY [[BITCAST11]](<2 x s16>)
+    ; SI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+    ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]]
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL4]]
+    ; SI: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C5]]
+    ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]]
+    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]]
+    ; SI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C5]]
+    ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]]
+    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32)
+    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]]
+    ; SI: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+    ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C5]]
+    ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]]
+    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL7]]
+    ; SI: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C5]]
+    ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C5]]
+    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C4]](s32)
+    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL8]]
+    ; SI: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+    ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C5]]
+    ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C5]]
+    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL9]]
+    ; SI: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+    ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C5]]
+    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C4]](s32)
+    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL10]]
+    ; SI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<42 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>), [[BITCAST16]](<2 x s16>), [[BITCAST17]](<2 x s16>), [[BITCAST18]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; SI: [[UV8:%[0-9]+]]:_(<21 x s16>), [[UV9:%[0-9]+]]:_(<21 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<42 x s16>)
+    ; SI: [[DEF3:%[0-9]+]]:_(<22 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[INSERT:%[0-9]+]]:_(<22 x s16>) = G_INSERT [[DEF3]], [[UV8]](<21 x s16>), 0
+    ; SI: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF5:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<22 x s16>)
+    ; SI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C4]](s32)
+    ; SI: [[BITCAST20:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST20]], [[C4]](s32)
+    ; SI: [[BITCAST21:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST21]], [[C4]](s32)
+    ; SI: [[BITCAST22:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST22]], [[C4]](s32)
+    ; SI: [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>)
+    ; SI: [[BITCAST23:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>)
+    ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST23]], [[C4]](s32)
+    ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[BITCAST19]](s32)
+    ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C5]]
+    ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
+    ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C5]]
+    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C4]](s32)
+    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL11]]
+    ; SI: [[BITCAST24:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
+    ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[BITCAST20]](s32)
+    ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C5]]
+    ; SI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C5]]
+    ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C4]](s32)
+    ; SI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL12]]
+    ; SI: [[BITCAST25:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32)
+    ; SI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[BITCAST21]](s32)
+    ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C5]]
+    ; SI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; SI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C5]]
+    ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C4]](s32)
+    ; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL13]]
+    ; SI: [[BITCAST26:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
+    ; SI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[BITCAST22]](s32)
+    ; SI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C5]]
+    ; SI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[BITCAST23]](s32)
+    ; SI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C5]]
+    ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C4]](s32)
+    ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL14]]
+    ; SI: [[BITCAST27:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32)
+    ; SI: $vgpr0 = COPY [[BITCAST24]](<2 x s16>)
+    ; SI: $vgpr1 = COPY [[BITCAST25]](<2 x s16>)
+    ; SI: $vgpr2 = COPY [[BITCAST26]](<2 x s16>)
+    ; SI: $vgpr3 = COPY [[BITCAST27]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v7s16_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 2, addrspace 1)
@@ -9809,105 +9753,162 @@ body: |
     ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
     ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
     ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1)
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
+    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1)
+    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C4]](s32)
+    ; CI-MESA: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32)
+    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
+    ; CI-MESA: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
+    ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
+    ; CI-MESA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; CI-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32)
+    ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; CI-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C4]](s32)
+    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
     ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
     ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
     ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
     ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
     ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
     ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
-    ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1)
+    ; CI-MESA: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
     ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
-    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
     ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
-    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
     ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
     ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST2]](<2 x s16>), 0
-    ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; CI-MESA: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>)
-    ; CI-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
-    ; CI-MESA: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>)
-    ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV12]](<7 x s16>), 0
-    ; CI-MESA: [[INSERT5:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT4]], [[CONCAT_VECTORS]](<4 x s16>), 0
-    ; CI-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
-    ; CI-MESA: [[UV20:%[0-9]+]]:_(<7 x s16>), [[UV21:%[0-9]+]]:_(<7 x s16>), [[UV22:%[0-9]+]]:_(<7 x s16>), [[UV23:%[0-9]+]]:_(<7 x s16>), [[UV24:%[0-9]+]]:_(<7 x s16>), [[UV25:%[0-9]+]]:_(<7 x s16>), [[UV26:%[0-9]+]]:_(<7 x s16>), [[UV27:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<56 x s16>)
-    ; CI-MESA: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV20]](<7 x s16>), 0
-    ; CI-MESA: [[INSERT7:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT6]], [[UV8]](<3 x s16>), 64
-    ; CI-MESA: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<8 x s16>)
-    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV28]](<2 x s16>)
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
-    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV29]](<2 x s16>)
-    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
-    ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV30]](<2 x s16>)
-    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
-    ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV31]](<2 x s16>)
-    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32)
-    ; CI-MESA: [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>)
-    ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV32]](<2 x s16>)
-    ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C4]](s32)
-    ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
-    ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; CI-MESA: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]]
+    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C4]](s32)
     ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; CI-MESA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
-    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
-    ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; CI-MESA: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
-    ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
-    ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; CI-MESA: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
-    ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
-    ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
-    ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
-    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]]
-    ; CI-MESA: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
-    ; CI-MESA: $vgpr0 = COPY [[BITCAST8]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[BITCAST9]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[BITCAST10]](<2 x s16>)
-    ; CI-MESA: $vgpr3 = COPY [[BITCAST11]](<2 x s16>)
+    ; CI-MESA: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+    ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]]
+    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL4]]
+    ; CI-MESA: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C5]]
+    ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]]
+    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]]
+    ; CI-MESA: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C5]]
+    ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]]
+    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32)
+    ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]]
+    ; CI-MESA: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+    ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C5]]
+    ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]]
+    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL7]]
+    ; CI-MESA: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C5]]
+    ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C5]]
+    ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C4]](s32)
+    ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL8]]
+    ; CI-MESA: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+    ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; CI-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C5]]
+    ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C5]]
+    ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL9]]
+    ; CI-MESA: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+    ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; CI-MESA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C5]]
+    ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C4]](s32)
+    ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL10]]
+    ; CI-MESA: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<42 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>), [[BITCAST16]](<2 x s16>), [[BITCAST17]](<2 x s16>), [[BITCAST18]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; CI-MESA: [[UV8:%[0-9]+]]:_(<21 x s16>), [[UV9:%[0-9]+]]:_(<21 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<42 x s16>)
+    ; CI-MESA: [[DEF3:%[0-9]+]]:_(<22 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<22 x s16>) = G_INSERT [[DEF3]], [[UV8]](<21 x s16>), 0
+    ; CI-MESA: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF5:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<22 x s16>)
+    ; CI-MESA: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; CI-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C4]](s32)
+    ; CI-MESA: [[BITCAST20:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; CI-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST20]], [[C4]](s32)
+    ; CI-MESA: [[BITCAST21:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; CI-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST21]], [[C4]](s32)
+    ; CI-MESA: [[BITCAST22:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; CI-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST22]], [[C4]](s32)
+    ; CI-MESA: [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>)
+    ; CI-MESA: [[BITCAST23:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>)
+    ; CI-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST23]], [[C4]](s32)
+    ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[BITCAST19]](s32)
+    ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C5]]
+    ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
+    ; CI-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C5]]
+    ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C4]](s32)
+    ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL11]]
+    ; CI-MESA: [[BITCAST24:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
+    ; CI-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[BITCAST20]](s32)
+    ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C5]]
+    ; CI-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; CI-MESA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C5]]
+    ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C4]](s32)
+    ; CI-MESA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL12]]
+    ; CI-MESA: [[BITCAST25:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32)
+    ; CI-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[BITCAST21]](s32)
+    ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C5]]
+    ; CI-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; CI-MESA: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C5]]
+    ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C4]](s32)
+    ; CI-MESA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL13]]
+    ; CI-MESA: [[BITCAST26:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
+    ; CI-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[BITCAST22]](s32)
+    ; CI-MESA: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C5]]
+    ; CI-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[BITCAST23]](s32)
+    ; CI-MESA: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C5]]
+    ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C4]](s32)
+    ; CI-MESA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL14]]
+    ; CI-MESA: [[BITCAST27:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[BITCAST24]](<2 x s16>)
+    ; CI-MESA: $vgpr1 = COPY [[BITCAST25]](<2 x s16>)
+    ; CI-MESA: $vgpr2 = COPY [[BITCAST26]](<2 x s16>)
+    ; CI-MESA: $vgpr3 = COPY [[BITCAST27]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v7s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
@@ -9920,105 +9921,162 @@ body: |
     ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
     ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
     ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
+    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
+    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1)
+    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
+    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1)
+    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C4]](s32)
+    ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
+    ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
+    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
+    ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32)
+    ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C4]](s32)
+    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
     ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
     ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
     ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
     ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
-    ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
-    ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1)
+    ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
     ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
-    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
     ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
-    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
     ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
     ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
-    ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST2]](<2 x s16>), 0
-    ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; VI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>)
-    ; VI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
-    ; VI: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>)
-    ; VI: [[INSERT4:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV12]](<7 x s16>), 0
-    ; VI: [[INSERT5:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT4]], [[CONCAT_VECTORS]](<4 x s16>), 0
-    ; VI: [[CONCAT_VECTORS5:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
-    ; VI: [[UV20:%[0-9]+]]:_(<7 x s16>), [[UV21:%[0-9]+]]:_(<7 x s16>), [[UV22:%[0-9]+]]:_(<7 x s16>), [[UV23:%[0-9]+]]:_(<7 x s16>), [[UV24:%[0-9]+]]:_(<7 x s16>), [[UV25:%[0-9]+]]:_(<7 x s16>), [[UV26:%[0-9]+]]:_(<7 x s16>), [[UV27:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<56 x s16>)
-    ; VI: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV20]](<7 x s16>), 0
-    ; VI: [[INSERT7:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT6]], [[UV8]](<3 x s16>), 64
-    ; VI: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<8 x s16>)
-    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV28]](<2 x s16>)
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
-    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV29]](<2 x s16>)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
-    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV30]](<2 x s16>)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
-    ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV31]](<2 x s16>)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32)
-    ; VI: [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>)
-    ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV32]](<2 x s16>)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C4]](s32)
-    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
-    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]]
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C4]](s32)
     ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
-    ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
-    ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
-    ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
-    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
-    ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
-    ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
-    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]]
-    ; VI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST8]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[BITCAST9]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[BITCAST10]](<2 x s16>)
-    ; VI: $vgpr3 = COPY [[BITCAST11]](<2 x s16>)
+    ; VI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+    ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL4]]
+    ; VI: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C5]]
+    ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]]
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]]
+    ; VI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C5]]
+    ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]]
+    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32)
+    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]]
+    ; VI: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+    ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C5]]
+    ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]]
+    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL7]]
+    ; VI: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C5]]
+    ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C5]]
+    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C4]](s32)
+    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL8]]
+    ; VI: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+    ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C5]]
+    ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C5]]
+    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
+    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL9]]
+    ; VI: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+    ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C5]]
+    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C4]](s32)
+    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL10]]
+    ; VI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<42 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>), [[BITCAST16]](<2 x s16>), [[BITCAST17]](<2 x s16>), [[BITCAST18]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; VI: [[UV8:%[0-9]+]]:_(<21 x s16>), [[UV9:%[0-9]+]]:_(<21 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<42 x s16>)
+    ; VI: [[DEF3:%[0-9]+]]:_(<22 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[INSERT:%[0-9]+]]:_(<22 x s16>) = G_INSERT [[DEF3]], [[UV8]](<21 x s16>), 0
+    ; VI: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF5:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<22 x s16>)
+    ; VI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C4]](s32)
+    ; VI: [[BITCAST20:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST20]], [[C4]](s32)
+    ; VI: [[BITCAST21:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST21]], [[C4]](s32)
+    ; VI: [[BITCAST22:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST22]], [[C4]](s32)
+    ; VI: [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>)
+    ; VI: [[BITCAST23:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>)
+    ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST23]], [[C4]](s32)
+    ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[BITCAST19]](s32)
+    ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C5]]
+    ; VI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
+    ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C5]]
+    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C4]](s32)
+    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL11]]
+    ; VI: [[BITCAST24:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
+    ; VI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[BITCAST20]](s32)
+    ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C5]]
+    ; VI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C5]]
+    ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C4]](s32)
+    ; VI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL12]]
+    ; VI: [[BITCAST25:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32)
+    ; VI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[BITCAST21]](s32)
+    ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C5]]
+    ; VI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; VI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C5]]
+    ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C4]](s32)
+    ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL13]]
+    ; VI: [[BITCAST26:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
+    ; VI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[BITCAST22]](s32)
+    ; VI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C5]]
+    ; VI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[BITCAST23]](s32)
+    ; VI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C5]]
+    ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C4]](s32)
+    ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL14]]
+    ; VI: [[BITCAST27:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32)
+    ; VI: $vgpr0 = COPY [[BITCAST24]](<2 x s16>)
+    ; VI: $vgpr1 = COPY [[BITCAST25]](<2 x s16>)
+    ; VI: $vgpr2 = COPY [[BITCAST26]](<2 x s16>)
+    ; VI: $vgpr3 = COPY [[BITCAST27]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 2, addrspace 1)
@@ -10044,76 +10102,104 @@ body: |
     ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
     ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
     ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1)
-    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
     ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
     ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
     ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
     ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1)
-    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
-    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
     ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
     ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>), 0
-    ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; GFX9-MESA: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>)
-    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
-    ; GFX9-MESA: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>)
-    ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV12]](<7 x s16>), 0
-    ; GFX9-MESA: [[INSERT5:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT4]], [[CONCAT_VECTORS]](<4 x s16>), 0
-    ; GFX9-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
-    ; GFX9-MESA: [[UV20:%[0-9]+]]:_(<7 x s16>), [[UV21:%[0-9]+]]:_(<7 x s16>), [[UV22:%[0-9]+]]:_(<7 x s16>), [[UV23:%[0-9]+]]:_(<7 x s16>), [[UV24:%[0-9]+]]:_(<7 x s16>), [[UV25:%[0-9]+]]:_(<7 x s16>), [[UV26:%[0-9]+]]:_(<7 x s16>), [[UV27:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<56 x s16>)
-    ; GFX9-MESA: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV20]](<7 x s16>), 0
-    ; GFX9-MESA: [[INSERT7:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT6]], [[UV8]](<3 x s16>), 64
-    ; GFX9-MESA: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<8 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV28]](<2 x s16>)
+    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
     ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
-    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV29]](<2 x s16>)
+    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
     ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C4]](s32)
-    ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV30]](<2 x s16>)
+    ; GFX9-MESA: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
     ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32)
-    ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV31]](<2 x s16>)
+    ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
     ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
-    ; GFX9-MESA: [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>)
-    ; GFX9-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV32]](<2 x s16>)
+    ; GFX9-MESA: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; GFX9-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
     ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
-    ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
-    ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
+    ; GFX9-MESA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; GFX9-MESA: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32)
+    ; GFX9-MESA: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C4]](s32)
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[DEF3]](s32)
     ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
-    ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[DEF3]](s32)
+    ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
     ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
-    ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
-    ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
     ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
-    ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
     ; GFX9-MESA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
-    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
-    ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>)
+    ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+    ; GFX9-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[COPY18]](s32)
+    ; GFX9-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX9-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
+    ; GFX9-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[DEF3]](s32)
+    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<42 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>), [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>), [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<21 x s16>), [[UV9:%[0-9]+]]:_(<21 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<42 x s16>)
+    ; GFX9-MESA: [[DEF4:%[0-9]+]]:_(<22 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<22 x s16>) = G_INSERT [[DEF4]], [[UV8]](<21 x s16>), 0
+    ; GFX9-MESA: [[DEF5:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF6:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<22 x s16>)
+    ; GFX9-MESA: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C4]](s32)
+    ; GFX9-MESA: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C4]](s32)
+    ; GFX9-MESA: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C4]](s32)
+    ; GFX9-MESA: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C4]](s32)
+    ; GFX9-MESA: [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF5]](<8 x s16>)
+    ; GFX9-MESA: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C4]](s32)
+    ; GFX9-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32)
+    ; GFX9-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)
+    ; GFX9-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32)
+    ; GFX9-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY24]](s32), [[COPY25]](s32)
+    ; GFX9-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[BITCAST10]](s32)
+    ; GFX9-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[COPY27]](s32)
+    ; GFX9-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[BITCAST11]](s32)
+    ; GFX9-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[BITCAST12]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY28]](s32), [[COPY29]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC11]](<2 x s16>)
+    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>)
+    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
+    ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<7 x s16>) = G_LOAD %0 :: (load 14, align 2, addrspace 1)
     %2:_(<7 x s16>) = G_IMPLICIT_DEF
@@ -10156,19 +10242,13 @@ body: |
     ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
     ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1)
     ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
     ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1)
@@ -10177,9 +10257,9 @@ body: |
     ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
     ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
+    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
     ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
     ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1)
     ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
@@ -10189,17 +10269,11 @@ body: |
     ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
     ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32)
-    ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
-    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32)
+    ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
     ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
     ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
     ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1)
@@ -10208,9 +10282,9 @@ body: |
     ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
     ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
-    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32)
-    ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32)
+    ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
     ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
     ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1)
     ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
@@ -10220,15 +10294,10 @@ body: |
     ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
     ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
-    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32)
-    ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32)
-    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]]
-    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
-    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32)
+    ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
+    ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
     ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1)
     ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
     ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1)
@@ -10237,77 +10306,151 @@ body: |
     ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
     ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
-    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32)
-    ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32)
+    ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST2]](<2 x s16>), 0
-    ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR9]](s16), 32
-    ; SI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>)
-    ; SI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
-    ; SI: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>)
-    ; SI: [[INSERT4:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV12]](<7 x s16>), 0
-    ; SI: [[INSERT5:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT4]], [[CONCAT_VECTORS]](<4 x s16>), 0
-    ; SI: [[CONCAT_VECTORS5:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
-    ; SI: [[UV20:%[0-9]+]]:_(<7 x s16>), [[UV21:%[0-9]+]]:_(<7 x s16>), [[UV22:%[0-9]+]]:_(<7 x s16>), [[UV23:%[0-9]+]]:_(<7 x s16>), [[UV24:%[0-9]+]]:_(<7 x s16>), [[UV25:%[0-9]+]]:_(<7 x s16>), [[UV26:%[0-9]+]]:_(<7 x s16>), [[UV27:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<56 x s16>)
-    ; SI: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV20]](<7 x s16>), 0
-    ; SI: [[INSERT7:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT6]], [[UV8]](<3 x s16>), 64
+    ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C7]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C7]](s32)
+    ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C7]](s32)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C7]](s32)
+    ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C7]](s32)
+    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C7]](s32)
+    ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C7]](s32)
+    ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C7]](s32)
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C7]](s32)
+    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]]
+    ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C7]](s32)
+    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL8]]
+    ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+    ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
+    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C7]](s32)
+    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL9]]
+    ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+    ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[COPY14]], [[C7]](s32)
+    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL10]]
+    ; SI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+    ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; SI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C9]]
+    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL11]]
+    ; SI: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
+    ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]]
+    ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C9]]
+    ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND16]], [[C7]](s32)
+    ; SI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND15]], [[SHL12]]
+    ; SI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32)
+    ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C9]]
+    ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C9]]
+    ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C7]](s32)
+    ; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND17]], [[SHL13]]
+    ; SI: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
+    ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]]
+    ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C9]]
+    ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C7]](s32)
+    ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL14]]
+    ; SI: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32)
+    ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C9]]
+    ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C9]]
+    ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C7]](s32)
+    ; SI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL15]]
+    ; SI: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32)
+    ; SI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C9]]
+    ; SI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; SI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C9]]
+    ; SI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C7]](s32)
+    ; SI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL16]]
+    ; SI: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR16]](s32)
+    ; SI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; SI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C9]]
+    ; SI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32)
+    ; SI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL17]]
+    ; SI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR17]](s32)
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<42 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>), [[BITCAST16]](<2 x s16>), [[BITCAST17]](<2 x s16>), [[BITCAST18]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; SI: [[UV8:%[0-9]+]]:_(<21 x s16>), [[UV9:%[0-9]+]]:_(<21 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<42 x s16>)
+    ; SI: [[DEF3:%[0-9]+]]:_(<22 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[INSERT:%[0-9]+]]:_(<22 x s16>) = G_INSERT [[DEF3]], [[UV8]](<21 x s16>), 0
     ; SI: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<8 x s16>)
-    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV28]](<2 x s16>)
-    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32)
-    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV29]](<2 x s16>)
-    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C5]](s32)
-    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV30]](<2 x s16>)
-    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C5]](s32)
-    ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV31]](<2 x s16>)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C5]](s32)
-    ; SI: [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>)
-    ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV32]](<2 x s16>)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C5]](s32)
-    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C8]]
-    ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C8]]
-    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C5]](s32)
-    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL10]]
-    ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
-    ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
-    ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C8]]
-    ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C8]]
-    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C5]](s32)
-    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL11]]
-    ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
-    ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
-    ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C8]]
-    ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C8]]
-    ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C5]](s32)
-    ; SI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL12]]
-    ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32)
-    ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
-    ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C8]]
-    ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
-    ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C8]]
-    ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C5]](s32)
-    ; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL13]]
-    ; SI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
-    ; SI: $vgpr0 = COPY [[BITCAST8]](<2 x s16>)
-    ; SI: $vgpr1 = COPY [[BITCAST9]](<2 x s16>)
-    ; SI: $vgpr2 = COPY [[BITCAST10]](<2 x s16>)
-    ; SI: $vgpr3 = COPY [[BITCAST11]](<2 x s16>)
+    ; SI: [[DEF5:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<22 x s16>)
+    ; SI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C7]](s32)
+    ; SI: [[BITCAST20:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST20]], [[C7]](s32)
+    ; SI: [[BITCAST21:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST21]], [[C7]](s32)
+    ; SI: [[BITCAST22:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST22]], [[C7]](s32)
+    ; SI: [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>)
+    ; SI: [[BITCAST23:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>)
+    ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST23]], [[C7]](s32)
+    ; SI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[BITCAST19]](s32)
+    ; SI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C9]]
+    ; SI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
+    ; SI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C9]]
+    ; SI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C7]](s32)
+    ; SI: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND26]], [[SHL18]]
+    ; SI: [[BITCAST24:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR18]](s32)
+    ; SI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[BITCAST20]](s32)
+    ; SI: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C9]]
+    ; SI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; SI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C9]]
+    ; SI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C7]](s32)
+    ; SI: [[OR19:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL19]]
+    ; SI: [[BITCAST25:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR19]](s32)
+    ; SI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[BITCAST21]](s32)
+    ; SI: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY32]], [[C9]]
+    ; SI: [[COPY33:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; SI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY33]], [[C9]]
+    ; SI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C7]](s32)
+    ; SI: [[OR20:%[0-9]+]]:_(s32) = G_OR [[AND30]], [[SHL20]]
+    ; SI: [[BITCAST26:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR20]](s32)
+    ; SI: [[COPY34:%[0-9]+]]:_(s32) = COPY [[BITCAST22]](s32)
+    ; SI: [[AND32:%[0-9]+]]:_(s32) = G_AND [[COPY34]], [[C9]]
+    ; SI: [[COPY35:%[0-9]+]]:_(s32) = COPY [[BITCAST23]](s32)
+    ; SI: [[AND33:%[0-9]+]]:_(s32) = G_AND [[COPY35]], [[C9]]
+    ; SI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND33]], [[C7]](s32)
+    ; SI: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND32]], [[SHL21]]
+    ; SI: [[BITCAST27:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR21]](s32)
+    ; SI: $vgpr0 = COPY [[BITCAST24]](<2 x s16>)
+    ; SI: $vgpr1 = COPY [[BITCAST25]](<2 x s16>)
+    ; SI: $vgpr2 = COPY [[BITCAST26]](<2 x s16>)
+    ; SI: $vgpr3 = COPY [[BITCAST27]](<2 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v7s16_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 1, addrspace 1)
@@ -10350,14 +10493,8 @@ body: |
     ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
     ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
     ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1)
     ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
     ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1)
@@ -10366,9 +10503,9 @@ body: |
     ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
     ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
-    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
-    ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
+    ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
     ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64)
     ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1)
     ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
@@ -10378,17 +10515,11 @@ body: |
     ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
     ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32)
-    ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
-    ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
-    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32)
+    ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]]
+    ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
     ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
     ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
     ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1)
@@ -10397,9 +10528,9 @@ body: |
     ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
     ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
-    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32)
-    ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
+    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32)
+    ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]]
     ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
     ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1)
     ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
@@ -10409,15 +10540,10 @@ body: |
     ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
     ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
-    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32)
-    ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
-    ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32)
-    ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]]
-    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
-    ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64)
+    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32)
+    ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
+    ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]]
+    ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
     ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1)
     ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
     ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1)
@@ -10426,77 +10552,151 @@ body: |
     ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
     ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
-    ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32)
-    ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32)
+    ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
+    ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]]
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST2]](<2 x s16>), 0
-    ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR9]](s16), 32
-    ; CI-MESA: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>)
-    ; CI-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
-    ; CI-MESA: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>)
-    ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV12]](<7 x s16>), 0
-    ; CI-MESA: [[INSERT5:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT4]], [[CONCAT_VECTORS]](<4 x s16>), 0
-    ; CI-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
-    ; CI-MESA: [[UV20:%[0-9]+]]:_(<7 x s16>), [[UV21:%[0-9]+]]:_(<7 x s16>), [[UV22:%[0-9]+]]:_(<7 x s16>), [[UV23:%[0-9]+]]:_(<7 x s16>), [[UV24:%[0-9]+]]:_(<7 x s16>), [[UV25:%[0-9]+]]:_(<7 x s16>), [[UV26:%[0-9]+]]:_(<7 x s16>), [[UV27:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<56 x s16>)
-    ; CI-MESA: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV20]](<7 x s16>), 0
-    ; CI-MESA: [[INSERT7:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT6]], [[UV8]](<3 x s16>), 64
+    ; CI-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C7]](s32)
+    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C7]](s32)
+    ; CI-MESA: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C7]](s32)
+    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C7]](s32)
+    ; CI-MESA: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C7]](s32)
+    ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C7]](s32)
+    ; CI-MESA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; CI-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C7]](s32)
+    ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; CI-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C7]](s32)
+    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C7]](s32)
+    ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]]
+    ; CI-MESA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C7]](s32)
+    ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL8]]
+    ; CI-MESA: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+    ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
+    ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C7]](s32)
+    ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL9]]
+    ; CI-MESA: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+    ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[COPY14]], [[C7]](s32)
+    ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL10]]
+    ; CI-MESA: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+    ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C9]]
+    ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL11]]
+    ; CI-MESA: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
+    ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]]
+    ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C9]]
+    ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND16]], [[C7]](s32)
+    ; CI-MESA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND15]], [[SHL12]]
+    ; CI-MESA: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32)
+    ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C9]]
+    ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CI-MESA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C9]]
+    ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C7]](s32)
+    ; CI-MESA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND17]], [[SHL13]]
+    ; CI-MESA: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
+    ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]]
+    ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; CI-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C9]]
+    ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C7]](s32)
+    ; CI-MESA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL14]]
+    ; CI-MESA: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32)
+    ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C9]]
+    ; CI-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; CI-MESA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C9]]
+    ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C7]](s32)
+    ; CI-MESA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL15]]
+    ; CI-MESA: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32)
+    ; CI-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C9]]
+    ; CI-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; CI-MESA: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C9]]
+    ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C7]](s32)
+    ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL16]]
+    ; CI-MESA: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR16]](s32)
+    ; CI-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; CI-MESA: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C9]]
+    ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32)
+    ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL17]]
+    ; CI-MESA: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR17]](s32)
+    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<42 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>), [[BITCAST16]](<2 x s16>), [[BITCAST17]](<2 x s16>), [[BITCAST18]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; CI-MESA: [[UV8:%[0-9]+]]:_(<21 x s16>), [[UV9:%[0-9]+]]:_(<21 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<42 x s16>)
+    ; CI-MESA: [[DEF3:%[0-9]+]]:_(<22 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<22 x s16>) = G_INSERT [[DEF3]], [[UV8]](<21 x s16>), 0
     ; CI-MESA: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<8 x s16>)
-    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV28]](<2 x s16>)
-    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32)
-    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV29]](<2 x s16>)
-    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C5]](s32)
-    ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV30]](<2 x s16>)
-    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C5]](s32)
-    ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV31]](<2 x s16>)
-    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C5]](s32)
-    ; CI-MESA: [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>)
-    ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV32]](<2 x s16>)
-    ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C5]](s32)
-    ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C8]]
-    ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C8]]
-    ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C5]](s32)
-    ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL10]]
-    ; CI-MESA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
-    ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
-    ; CI-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C8]]
-    ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C8]]
-    ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C5]](s32)
-    ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL11]]
-    ; CI-MESA: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
-    ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
-    ; CI-MESA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C8]]
-    ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C8]]
-    ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C5]](s32)
-    ; CI-MESA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL12]]
-    ; CI-MESA: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32)
-    ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
-    ; CI-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C8]]
-    ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
-    ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C8]]
-    ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C5]](s32)
-    ; CI-MESA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL13]]
-    ; CI-MESA: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
-    ; CI-MESA: $vgpr0 = COPY [[BITCAST8]](<2 x s16>)
-    ; CI-MESA: $vgpr1 = COPY [[BITCAST9]](<2 x s16>)
-    ; CI-MESA: $vgpr2 = COPY [[BITCAST10]](<2 x s16>)
-    ; CI-MESA: $vgpr3 = COPY [[BITCAST11]](<2 x s16>)
+    ; CI-MESA: [[DEF5:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<22 x s16>)
+    ; CI-MESA: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; CI-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C7]](s32)
+    ; CI-MESA: [[BITCAST20:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; CI-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST20]], [[C7]](s32)
+    ; CI-MESA: [[BITCAST21:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; CI-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST21]], [[C7]](s32)
+    ; CI-MESA: [[BITCAST22:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; CI-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST22]], [[C7]](s32)
+    ; CI-MESA: [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>)
+    ; CI-MESA: [[BITCAST23:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>)
+    ; CI-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST23]], [[C7]](s32)
+    ; CI-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[BITCAST19]](s32)
+    ; CI-MESA: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C9]]
+    ; CI-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
+    ; CI-MESA: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C9]]
+    ; CI-MESA: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C7]](s32)
+    ; CI-MESA: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND26]], [[SHL18]]
+    ; CI-MESA: [[BITCAST24:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR18]](s32)
+    ; CI-MESA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[BITCAST20]](s32)
+    ; CI-MESA: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C9]]
+    ; CI-MESA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; CI-MESA: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C9]]
+    ; CI-MESA: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C7]](s32)
+    ; CI-MESA: [[OR19:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL19]]
+    ; CI-MESA: [[BITCAST25:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR19]](s32)
+    ; CI-MESA: [[COPY32:%[0-9]+]]:_(s32) = COPY [[BITCAST21]](s32)
+    ; CI-MESA: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY32]], [[C9]]
+    ; CI-MESA: [[COPY33:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; CI-MESA: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY33]], [[C9]]
+    ; CI-MESA: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C7]](s32)
+    ; CI-MESA: [[OR20:%[0-9]+]]:_(s32) = G_OR [[AND30]], [[SHL20]]
+    ; CI-MESA: [[BITCAST26:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR20]](s32)
+    ; CI-MESA: [[COPY34:%[0-9]+]]:_(s32) = COPY [[BITCAST22]](s32)
+    ; CI-MESA: [[AND32:%[0-9]+]]:_(s32) = G_AND [[COPY34]], [[C9]]
+    ; CI-MESA: [[COPY35:%[0-9]+]]:_(s32) = COPY [[BITCAST23]](s32)
+    ; CI-MESA: [[AND33:%[0-9]+]]:_(s32) = G_AND [[COPY35]], [[C9]]
+    ; CI-MESA: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND33]], [[C7]](s32)
+    ; CI-MESA: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND32]], [[SHL21]]
+    ; CI-MESA: [[BITCAST27:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR21]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[BITCAST24]](<2 x s16>)
+    ; CI-MESA: $vgpr1 = COPY [[BITCAST25]](<2 x s16>)
+    ; CI-MESA: $vgpr2 = COPY [[BITCAST26]](<2 x s16>)
+    ; CI-MESA: $vgpr3 = COPY [[BITCAST27]](<2 x s16>)
     ; VI-LABEL: name: test_load_global_v7s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1)
@@ -10522,14 +10722,8 @@ body: |
     ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
     ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
     ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1)
     ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
     ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1)
@@ -10537,8 +10731,8 @@ body: |
     ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
     ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
     ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
     ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64)
     ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1)
     ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
@@ -10547,16 +10741,10 @@ body: |
     ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
     ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
     ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
-    ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
-    ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]]
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C4]](s32)
-    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
-    ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
+    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
     ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
     ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1)
@@ -10564,8 +10752,8 @@ body: |
     ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
     ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
     ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]]
-    ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16)
-    ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16)
+    ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]]
     ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64)
     ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1)
     ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
@@ -10574,14 +10762,9 @@ body: |
     ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]]
     ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
     ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C1]]
-    ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C2]](s16)
-    ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
-    ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
-    ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C4]](s32)
-    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
-    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64)
+    ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C2]](s16)
+    ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]]
+    ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
     ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1)
     ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
     ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1)
@@ -10589,76 +10772,150 @@ body: |
     ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C1]]
     ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
     ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C1]]
-    ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C2]](s16)
-    ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C2]](s16)
+    ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]]
+    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST2]](<2 x s16>), 0
-    ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR9]](s16), 32
-    ; VI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>)
-    ; VI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
-    ; VI: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>)
-    ; VI: [[INSERT4:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV12]](<7 x s16>), 0
-    ; VI: [[INSERT5:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT4]], [[CONCAT_VECTORS]](<4 x s16>), 0
-    ; VI: [[CONCAT_VECTORS5:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
-    ; VI: [[UV20:%[0-9]+]]:_(<7 x s16>), [[UV21:%[0-9]+]]:_(<7 x s16>), [[UV22:%[0-9]+]]:_(<7 x s16>), [[UV23:%[0-9]+]]:_(<7 x s16>), [[UV24:%[0-9]+]]:_(<7 x s16>), [[UV25:%[0-9]+]]:_(<7 x s16>), [[UV26:%[0-9]+]]:_(<7 x s16>), [[UV27:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<56 x s16>)
-    ; VI: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV20]](<7 x s16>), 0
-    ; VI: [[INSERT7:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT6]], [[UV8]](<3 x s16>), 64
+    ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32)
+    ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C6]](s32)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C6]](s32)
+    ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C6]](s32)
+    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C6]](s32)
+    ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C6]](s32)
+    ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C6]](s32)
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]]
+    ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32)
+    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL8]]
+    ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+    ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+    ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
+    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32)
+    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL9]]
+    ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+    ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
+    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C6]](s32)
+    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL10]]
+    ; VI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
+    ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C8]]
+    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C6]](s32)
+    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL11]]
+    ; VI: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C8]]
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C8]]
+    ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND16]], [[C6]](s32)
+    ; VI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND15]], [[SHL12]]
+    ; VI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32)
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C8]]
+    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C8]]
+    ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C6]](s32)
+    ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND17]], [[SHL13]]
+    ; VI: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
+    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C8]]
+    ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C8]]
+    ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C6]](s32)
+    ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL14]]
+    ; VI: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32)
+    ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C8]]
+    ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C8]]
+    ; VI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C6]](s32)
+    ; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL15]]
+    ; VI: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32)
+    ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C8]]
+    ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; VI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C8]]
+    ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C6]](s32)
+    ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL16]]
+    ; VI: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR16]](s32)
+    ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; VI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C8]]
+    ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL17]]
+    ; VI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR17]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<42 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>), [[BITCAST16]](<2 x s16>), [[BITCAST17]](<2 x s16>), [[BITCAST18]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; VI: [[UV8:%[0-9]+]]:_(<21 x s16>), [[UV9:%[0-9]+]]:_(<21 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<42 x s16>)
+    ; VI: [[DEF3:%[0-9]+]]:_(<22 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[INSERT:%[0-9]+]]:_(<22 x s16>) = G_INSERT [[DEF3]], [[UV8]](<21 x s16>), 0
     ; VI: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<8 x s16>)
-    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV28]](<2 x s16>)
-    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32)
-    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV29]](<2 x s16>)
-    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32)
-    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV30]](<2 x s16>)
-    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32)
-    ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV31]](<2 x s16>)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32)
-    ; VI: [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>)
-    ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV32]](<2 x s16>)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C4]](s32)
-    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]]
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]]
-    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C4]](s32)
-    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL10]]
-    ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
-    ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]]
-    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]]
-    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
-    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL11]]
-    ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
-    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
-    ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]]
-    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
-    ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C4]](s32)
-    ; VI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL12]]
-    ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32)
-    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
-    ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
-    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
-    ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
-    ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
-    ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL13]]
-    ; VI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
-    ; VI: $vgpr0 = COPY [[BITCAST8]](<2 x s16>)
-    ; VI: $vgpr1 = COPY [[BITCAST9]](<2 x s16>)
-    ; VI: $vgpr2 = COPY [[BITCAST10]](<2 x s16>)
-    ; VI: $vgpr3 = COPY [[BITCAST11]](<2 x s16>)
+    ; VI: [[DEF5:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<22 x s16>)
+    ; VI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C6]](s32)
+    ; VI: [[BITCAST20:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST20]], [[C6]](s32)
+    ; VI: [[BITCAST21:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST21]], [[C6]](s32)
+    ; VI: [[BITCAST22:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST22]], [[C6]](s32)
+    ; VI: [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>)
+    ; VI: [[BITCAST23:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>)
+    ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST23]], [[C6]](s32)
+    ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST19]](s32)
+    ; VI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C8]]
+    ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
+    ; VI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C8]]
+    ; VI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C6]](s32)
+    ; VI: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND26]], [[SHL18]]
+    ; VI: [[BITCAST24:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR18]](s32)
+    ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[BITCAST20]](s32)
+    ; VI: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C8]]
+    ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; VI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C8]]
+    ; VI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C6]](s32)
+    ; VI: [[OR19:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL19]]
+    ; VI: [[BITCAST25:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR19]](s32)
+    ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[BITCAST21]](s32)
+    ; VI: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C8]]
+    ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; VI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C8]]
+    ; VI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C6]](s32)
+    ; VI: [[OR20:%[0-9]+]]:_(s32) = G_OR [[AND30]], [[SHL20]]
+    ; VI: [[BITCAST26:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR20]](s32)
+    ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST22]](s32)
+    ; VI: [[AND32:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C8]]
+    ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[BITCAST23]](s32)
+    ; VI: [[AND33:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C8]]
+    ; VI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND33]], [[C6]](s32)
+    ; VI: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND32]], [[SHL21]]
+    ; VI: [[BITCAST27:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR21]](s32)
+    ; VI: $vgpr0 = COPY [[BITCAST24]](<2 x s16>)
+    ; VI: $vgpr1 = COPY [[BITCAST25]](<2 x s16>)
+    ; VI: $vgpr2 = COPY [[BITCAST26]](<2 x s16>)
+    ; VI: $vgpr3 = COPY [[BITCAST27]](<2 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 1, addrspace 1)
@@ -10697,9 +10954,6 @@ body: |
     ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
     ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
     ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
     ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1)
@@ -10721,10 +10975,6 @@ body: |
     ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
     ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
     ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
-    ; GFX9-MESA: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9-MESA: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
     ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
@@ -10746,9 +10996,6 @@ body: |
     ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C1]]
     ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C2]](s16)
     ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]]
-    ; GFX9-MESA: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR4]](s16)
-    ; GFX9-MESA: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[OR5]](s16)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[ANYEXT5]](s32)
     ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64)
     ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1)
     ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64)
@@ -10759,58 +11006,97 @@ body: |
     ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C1]]
     ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C2]](s16)
     ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]]
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>), 0
-    ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR6]](s16), 32
-    ; GFX9-MESA: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>)
-    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF2]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
-    ; GFX9-MESA: [[UV12:%[0-9]+]]:_(<7 x s16>), [[UV13:%[0-9]+]]:_(<7 x s16>), [[UV14:%[0-9]+]]:_(<7 x s16>), [[UV15:%[0-9]+]]:_(<7 x s16>), [[UV16:%[0-9]+]]:_(<7 x s16>), [[UV17:%[0-9]+]]:_(<7 x s16>), [[UV18:%[0-9]+]]:_(<7 x s16>), [[UV19:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<56 x s16>)
-    ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV12]](<7 x s16>), 0
-    ; GFX9-MESA: [[INSERT5:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT4]], [[CONCAT_VECTORS]](<4 x s16>), 0
-    ; GFX9-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>)
-    ; GFX9-MESA: [[UV20:%[0-9]+]]:_(<7 x s16>), [[UV21:%[0-9]+]]:_(<7 x s16>), [[UV22:%[0-9]+]]:_(<7 x s16>), [[UV23:%[0-9]+]]:_(<7 x s16>), [[UV24:%[0-9]+]]:_(<7 x s16>), [[UV25:%[0-9]+]]:_(<7 x s16>), [[UV26:%[0-9]+]]:_(<7 x s16>), [[UV27:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<56 x s16>)
-    ; GFX9-MESA: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV20]](<7 x s16>), 0
-    ; GFX9-MESA: [[INSERT7:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[INSERT6]], [[UV8]](<3 x s16>), 64
-    ; GFX9-MESA: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<8 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV28]](<2 x s16>)
+    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
     ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
-    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV29]](<2 x s16>)
+    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
     ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32)
-    ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV30]](<2 x s16>)
+    ; GFX9-MESA: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
     ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C6]](s32)
-    ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV31]](<2 x s16>)
+    ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
     ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C6]](s32)
-    ; GFX9-MESA: [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>)
-    ; GFX9-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV32]](<2 x s16>)
+    ; GFX9-MESA: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; GFX9-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
     ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C6]](s32)
-    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
-    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
-    ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
-    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
-    ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
-    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
-    ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>)
+    ; GFX9-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C6]](s32)
+    ; GFX9-MESA: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; GFX9-MESA: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C6]](s32)
+    ; GFX9-MESA: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C6]](s32)
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9-MESA: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9-MESA: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
+    ; GFX9-MESA: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR4]](s16)
+    ; GFX9-MESA: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[OR5]](s16)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[ANYEXT5]](s32)
+    ; GFX9-MESA: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[OR6]](s16)
+    ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[DEF3]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT6]](s32), [[COPY1]](s32)
+    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF3]](s32)
+    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32)
+    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32)
+    ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32)
+    ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
+    ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
+    ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF3]](s32)
+    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<42 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>), [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>), [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<21 x s16>), [[UV9:%[0-9]+]]:_(<21 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<42 x s16>)
+    ; GFX9-MESA: [[DEF4:%[0-9]+]]:_(<22 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<22 x s16>) = G_INSERT [[DEF4]], [[UV8]](<21 x s16>), 0
+    ; GFX9-MESA: [[DEF5:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF6:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<22 x s16>)
+    ; GFX9-MESA: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C6]](s32)
+    ; GFX9-MESA: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C6]](s32)
+    ; GFX9-MESA: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C6]](s32)
+    ; GFX9-MESA: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C6]](s32)
+    ; GFX9-MESA: [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF5]](<8 x s16>)
+    ; GFX9-MESA: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C6]](s32)
+    ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32)
+    ; GFX9-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+    ; GFX9-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32)
+    ; GFX9-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[COPY18]](s32)
+    ; GFX9-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[BITCAST10]](s32)
+    ; GFX9-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
+    ; GFX9-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST11]](s32)
+    ; GFX9-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[BITCAST12]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC11]](<2 x s16>)
+    ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>)
+    ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
+    ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<7 x s16>) = G_LOAD %0 :: (load 14, align 1, addrspace 1)
     %2:_(<7 x s16>) = G_IMPLICIT_DEF
@@ -11389,10 +11675,9 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 8, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v3s32_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1)
@@ -12578,72 +12863,66 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load 8 from unknown-address + 16, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; SI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; CI-HSA-LABEL: name: test_load_global_v3s64_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1)
     ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load 8 from unknown-address + 16, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; CI-HSA: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; CI-HSA: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; CI-MESA-LABEL: name: test_load_global_v3s64_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1)
     ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load 8 from unknown-address + 16, addrspace 1)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; CI-MESA: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_global_v3s64_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1)
     ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load 8 from unknown-address + 16, addrspace 1)
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1)
     ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load 8 from unknown-address + 16, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; GFX9-HSA: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1)
     ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load 8 from unknown-address + 16, addrspace 1)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 8, addrspace 1)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -12783,7 +13062,6 @@ body: |
     ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
     ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
     ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
     ; SI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64)
     ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load 1 from unknown-address + 16, addrspace 1)
@@ -12842,24 +13120,21 @@ body: |
     ; SI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
     ; SI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
     ; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s64>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[MV2]](s64), 128
-    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; SI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; CI-HSA-LABEL: name: test_load_global_v3s64_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 1, addrspace 1)
     ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load 8 from unknown-address + 16, align 1, addrspace 1)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; CI-HSA: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; CI-HSA: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; CI-MESA-LABEL: name: test_load_global_v3s64_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1)
@@ -12986,7 +13261,6 @@ body: |
     ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32)
     ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
     ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
     ; CI-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CI-MESA: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64)
     ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load 1 from unknown-address + 16, addrspace 1)
@@ -13045,12 +13319,10 @@ body: |
     ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32)
     ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
     ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s64>), 0
-    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[MV2]](s64), 128
-    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_global_v3s64_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1)
@@ -13161,7 +13433,6 @@ body: |
     ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
     ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
     ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
     ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
     ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load 1 from unknown-address + 16, addrspace 1)
@@ -13212,24 +13483,21 @@ body: |
     ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
     ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
     ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s64>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[MV2]](s64), 128
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 1, addrspace 1)
     ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load 8 from unknown-address + 16, align 1, addrspace 1)
-    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; GFX9-HSA: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1)
@@ -13340,7 +13608,6 @@ body: |
     ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
     ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
     ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
     ; GFX9-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; GFX9-MESA: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
     ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load 1 from unknown-address + 16, addrspace 1)
@@ -13391,12 +13658,10 @@ body: |
     ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32)
     ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]]
     ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s64>), 0
-    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[MV2]](s64), 128
-    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 1, addrspace 1)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -15929,7 +16194,6 @@ body: |
     ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
@@ -15953,11 +16217,8 @@ body: |
     ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[COPY13:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY13]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
     ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
     ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1)
@@ -15967,18 +16228,18 @@ body: |
     ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1)
     ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
     ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1)
-    ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
-    ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
     ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
     ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
     ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
     ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
+    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
     ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
     ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
@@ -15989,21 +16250,20 @@ body: |
     ; SI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load 1 from unknown-address + 18, addrspace 1)
     ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
     ; SI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load 1 from unknown-address + 19, addrspace 1)
-    ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
-    ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
-    ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
-    ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
+    ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
+    ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
     ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
     ; SI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
-    ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
+    ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
     ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
     ; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
-    ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
+    ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
     ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
     ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32)
     ; SI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
     ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load 1 from unknown-address + 20, addrspace 1)
     ; SI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
@@ -16012,27 +16272,26 @@ body: |
     ; SI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load 1 from unknown-address + 22, addrspace 1)
     ; SI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
     ; SI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load 1 from unknown-address + 23, addrspace 1)
-    ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
-    ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
-    ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
-    ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
+    ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
+    ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
     ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
     ; SI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
-    ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
+    ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
     ; SI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
     ; SI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; SI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
-    ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
+    ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
+    ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
     ; SI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
     ; SI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; SI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR17]](s32), 64
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; SI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY27:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY26]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY27]](s96)
+    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; SI: [[COPY25:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96)
+    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96)
     ; CI-HSA-LABEL: name: test_extload_global_v2s96_from_24_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 1, addrspace 1)
@@ -16098,7 +16357,6 @@ body: |
     ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
@@ -16122,11 +16380,8 @@ body: |
     ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[COPY13:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY13]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
     ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
     ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1)
@@ -16136,18 +16391,18 @@ body: |
     ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1)
     ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
     ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1)
-    ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
-    ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
-    ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
-    ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
     ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
     ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
-    ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
     ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
     ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
+    ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
     ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
     ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; CI-MESA: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
@@ -16158,21 +16413,20 @@ body: |
     ; CI-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load 1 from unknown-address + 18, addrspace 1)
     ; CI-MESA: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
     ; CI-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load 1 from unknown-address + 19, addrspace 1)
-    ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
-    ; CI-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
-    ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
-    ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
+    ; CI-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
+    ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
     ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
     ; CI-MESA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
-    ; CI-MESA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
+    ; CI-MESA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
     ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
     ; CI-MESA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
-    ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
+    ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
     ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
     ; CI-MESA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32)
     ; CI-MESA: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
     ; CI-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load 1 from unknown-address + 20, addrspace 1)
     ; CI-MESA: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
@@ -16181,27 +16435,26 @@ body: |
     ; CI-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load 1 from unknown-address + 22, addrspace 1)
     ; CI-MESA: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
     ; CI-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load 1 from unknown-address + 23, addrspace 1)
-    ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
-    ; CI-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
-    ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
-    ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
+    ; CI-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
+    ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
     ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
     ; CI-MESA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; CI-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
-    ; CI-MESA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
+    ; CI-MESA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
     ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
     ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; CI-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
-    ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
+    ; CI-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
+    ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
     ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
     ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR17]](s32), 64
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; CI-MESA: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI-MESA: [[COPY27:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY26]](s96)
-    ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY27]](s96)
+    ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-MESA: [[COPY25:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-MESA: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96)
+    ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96)
     ; VI-LABEL: name: test_extload_global_v2s96_from_24_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1)
@@ -16255,7 +16508,6 @@ body: |
     ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
@@ -16279,11 +16531,8 @@ body: |
     ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[COPY13:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY13]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
     ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
     ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1)
@@ -16293,18 +16542,18 @@ body: |
     ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1)
     ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
     ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1)
-    ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
-    ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
     ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
     ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
     ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
     ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
+    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
     ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
     ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
@@ -16315,21 +16564,20 @@ body: |
     ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load 1 from unknown-address + 18, addrspace 1)
     ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
     ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load 1 from unknown-address + 19, addrspace 1)
-    ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
-    ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
-    ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
-    ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
+    ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
+    ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
     ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
     ; VI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
-    ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
+    ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
     ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
     ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
-    ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
+    ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
     ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
     ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32)
     ; VI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
     ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load 1 from unknown-address + 20, addrspace 1)
     ; VI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
@@ -16338,27 +16586,26 @@ body: |
     ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load 1 from unknown-address + 22, addrspace 1)
     ; VI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
     ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load 1 from unknown-address + 23, addrspace 1)
-    ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
-    ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
-    ; VI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
-    ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
+    ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
+    ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
     ; VI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
     ; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; VI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
-    ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; VI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
+    ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
     ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
     ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; VI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
-    ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
+    ; VI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
+    ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
     ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
     ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; VI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR17]](s32), 64
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; VI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY27:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY26]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY27]](s96)
+    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; VI: [[COPY25:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96)
+    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96)
     ; GFX9-HSA-LABEL: name: test_extload_global_v2s96_from_24_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 1, addrspace 1)
@@ -16424,7 +16671,6 @@ body: |
     ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
     ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1)
@@ -16448,11 +16694,8 @@ body: |
     ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY13]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
     ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
     ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1)
@@ -16462,18 +16705,18 @@ body: |
     ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1)
     ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64)
     ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1)
-    ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
-    ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
-    ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
-    ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
     ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
     ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
-    ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
     ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
     ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; GFX9-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
+    ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
     ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
     ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; GFX9-MESA: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64)
@@ -16484,21 +16727,20 @@ body: |
     ; GFX9-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load 1 from unknown-address + 18, addrspace 1)
     ; GFX9-MESA: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64)
     ; GFX9-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load 1 from unknown-address + 19, addrspace 1)
-    ; GFX9-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
-    ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
-    ; GFX9-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
-    ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; GFX9-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
+    ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; GFX9-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
+    ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
     ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
     ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; GFX9-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
-    ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; GFX9-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
+    ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
     ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
     ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; GFX9-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
-    ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; GFX9-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
+    ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
     ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
     ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32)
     ; GFX9-MESA: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64)
     ; GFX9-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load 1 from unknown-address + 20, addrspace 1)
     ; GFX9-MESA: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64)
@@ -16507,27 +16749,26 @@ body: |
     ; GFX9-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load 1 from unknown-address + 22, addrspace 1)
     ; GFX9-MESA: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64)
     ; GFX9-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load 1 from unknown-address + 23, addrspace 1)
-    ; GFX9-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
-    ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
-    ; GFX9-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
-    ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; GFX9-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
+    ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; GFX9-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
+    ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
     ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
     ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; GFX9-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
-    ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; GFX9-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
+    ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
     ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
     ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; GFX9-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
-    ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
+    ; GFX9-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
+    ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
     ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
     ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR17]](s32), 64
-    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; GFX9-MESA: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9-MESA: [[COPY27:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY26]](s96)
-    ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY27]](s96)
+    ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX9-MESA: [[COPY25:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-MESA: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96)
+    ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 1, addrspace 1)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -16567,7 +16808,6 @@ body: |
     ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
@@ -16579,50 +16819,45 @@ body: |
     ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[COPY7:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY7]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
     ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1)
     ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
     ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 2 from unknown-address + 14, addrspace 1)
-    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
-    ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
     ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
     ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
     ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
     ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 2 from unknown-address + 16, addrspace 1)
     ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
     ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 2 from unknown-address + 18, addrspace 1)
-    ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
-    ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
     ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
     ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32)
     ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
     ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 2 from unknown-address + 20, addrspace 1)
     ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
     ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 2 from unknown-address + 22, addrspace 1)
-    ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
-    ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+    ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
+    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
     ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
     ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; SI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR5]](s32), 64
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; SI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY15:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY14]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY15]](s96)
+    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; SI: [[COPY13:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96)
+    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96)
     ; CI-HSA-LABEL: name: test_extload_global_v2s96_from_24_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 2, addrspace 1)
@@ -16660,7 +16895,6 @@ body: |
     ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
@@ -16672,50 +16906,45 @@ body: |
     ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[COPY7:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY7]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
     ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1)
     ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
     ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 2 from unknown-address + 14, addrspace 1)
-    ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
-    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
-    ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
+    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
     ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
     ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
     ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
     ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 2 from unknown-address + 16, addrspace 1)
     ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
     ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 2 from unknown-address + 18, addrspace 1)
-    ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
-    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
-    ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
-    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
     ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
     ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32)
     ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
     ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 2 from unknown-address + 20, addrspace 1)
     ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
     ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 2 from unknown-address + 22, addrspace 1)
-    ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
-    ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
-    ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+    ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
+    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
     ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
     ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR5]](s32), 64
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; CI-MESA: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI-MESA: [[COPY15:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY14]](s96)
-    ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY15]](s96)
+    ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-MESA: [[COPY13:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-MESA: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96)
+    ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96)
     ; VI-LABEL: name: test_extload_global_v2s96_from_24_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
@@ -16741,7 +16970,6 @@ body: |
     ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
@@ -16753,50 +16981,45 @@ body: |
     ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[COPY7:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY7]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
     ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1)
     ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
     ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 2 from unknown-address + 14, addrspace 1)
-    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
-    ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
     ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
     ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
     ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
     ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 2 from unknown-address + 16, addrspace 1)
     ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
     ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 2 from unknown-address + 18, addrspace 1)
-    ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
-    ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
     ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
     ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32)
     ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
     ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 2 from unknown-address + 20, addrspace 1)
     ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
     ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 2 from unknown-address + 22, addrspace 1)
-    ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
-    ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+    ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
+    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
     ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
     ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; VI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR5]](s32), 64
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; VI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY15:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY14]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY15]](s96)
+    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; VI: [[COPY13:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96)
+    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96)
     ; GFX9-HSA-LABEL: name: test_extload_global_v2s96_from_24_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 2, addrspace 1)
@@ -16834,7 +17057,6 @@ body: |
     ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
     ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1)
@@ -16846,50 +17068,45 @@ body: |
     ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY7]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
     ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
     ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1)
     ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64)
     ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 2 from unknown-address + 14, addrspace 1)
-    ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
-    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
-    ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
+    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
     ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
     ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
     ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64)
     ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 2 from unknown-address + 16, addrspace 1)
     ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64)
     ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 2 from unknown-address + 18, addrspace 1)
-    ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
-    ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
-    ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
-    ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
     ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
     ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32)
     ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64)
     ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 2 from unknown-address + 20, addrspace 1)
     ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64)
     ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 2 from unknown-address + 22, addrspace 1)
-    ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
-    ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
-    ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+    ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
+    ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
     ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
     ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR5]](s32), 64
-    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY14]](s96)
-    ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY15]](s96)
+    ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96)
+    ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 2, addrspace 1)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -16910,23 +17127,21 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 8, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY1]], [[LOAD]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
     ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; SI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load 8 from unknown-address + 12, align 4, addrspace 1)
     ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
     ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 4 from unknown-address + 20, addrspace 1)
-    ; SI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD2]](<2 x s32>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY3:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96)
+    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-HSA-LABEL: name: test_extload_global_v2s96_from_24_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1)
@@ -17012,10 +17227,9 @@ body: |
     ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
     ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
     ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 4 from unknown-address + 20, addrspace 1)
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD1]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD2]](s32), 64
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
     ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
     ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
index 839bf649863f..1dd4347e1728 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
@@ -994,59 +994,181 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s24_align2
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
     ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3)
-    ; SI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3)
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; SI: $vgpr0 = COPY [[COPY5]](s32)
     ; CI-LABEL: name: test_load_local_s24_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
     ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3)
-    ; CI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3)
+    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI: $vgpr0 = COPY [[COPY5]](s32)
     ; CI-DS128-LABEL: name: test_load_local_s24_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
+    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
     ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; CI-DS128: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; CI-DS128: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3)
+    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-DS128: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; CI-DS128: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-DS128: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; CI-DS128: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI-DS128: $vgpr0 = COPY [[COPY5]](s32)
     ; VI-LABEL: name: test_load_local_s24_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3)
-    ; VI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3)
+    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]]
+    ; VI: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_local_s24_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]]
+    ; GFX9: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9-UNALIGNED: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load 2, addrspace 3)
@@ -1060,26 +1182,70 @@ body: |
     ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX10-LABEL: name: test_load_local_s24_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
+    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
     ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3)
-    ; GFX10: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; GFX10: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; GFX10: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3)
+    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX10: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]]
+    ; GFX10: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16)
+    ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX10: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]]
+    ; GFX10: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16)
+    ; GFX10: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX10: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
+    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
     ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; GFX10-UNALIGNED: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3)
+    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]]
+    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16)
+    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; GFX10-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]]
+    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16)
+    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10-UNALIGNED: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX10-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load 3, align 2, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -1094,59 +1260,171 @@ body: |
 
     ; SI-LABEL: name: test_load_local_s24_align1
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p3) :: (load 2, align 1, addrspace 3)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, addrspace 3)
-    ; SI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3)
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; SI: $vgpr0 = COPY [[COPY4]](s32)
     ; CI-LABEL: name: test_load_local_s24_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p3) :: (load 2, align 1, addrspace 3)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
+    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, addrspace 3)
-    ; CI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3)
+    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI: $vgpr0 = COPY [[COPY4]](s32)
     ; CI-DS128-LABEL: name: test_load_local_s24_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p3) :: (load 2, align 1, addrspace 3)
-    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
+    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; CI-DS128: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; CI-DS128: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3)
+    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3)
+    ; CI-DS128: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-DS128: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-DS128: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI-DS128: $vgpr0 = COPY [[COPY4]](s32)
     ; VI-LABEL: name: test_load_local_s24_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p3) :: (load 2, align 1, addrspace 3)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, addrspace 3)
-    ; VI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3)
+    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_local_s24_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p3) :: (load 2, align 1, addrspace 3)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, addrspace 3)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9-UNALIGNED: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load 2, align 1, addrspace 3)
@@ -1160,26 +1438,66 @@ body: |
     ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX10-LABEL: name: test_load_local_s24_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p3) :: (load 2, align 1, addrspace 3)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
+    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, addrspace 3)
-    ; GFX10: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; GFX10: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; GFX10: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3)
+    ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3)
+    ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX10: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX10: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX10: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX10: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX10: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p3) :: (load 2, align 1, addrspace 3)
-    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
+    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; GFX10-UNALIGNED: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3)
+    ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3)
+    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX10-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10-UNALIGNED: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX10-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load 3, align 1, addrspace 3)
     %2:_(s32) = G_ANYEXT %1
@@ -2060,7 +2378,6 @@ body: |
     ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -2083,10 +2400,8 @@ body: |
     ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-LABEL: name: test_load_local_s96_align16
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2141,7 +2456,6 @@ body: |
     ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -2164,10 +2478,8 @@ body: |
     ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-DS128-LABEL: name: test_load_local_s96_align16
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2222,7 +2534,6 @@ body: |
     ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -2245,10 +2556,8 @@ body: |
     ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_local_s96_align16
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2303,7 +2612,6 @@ body: |
     ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -2326,10 +2634,8 @@ body: |
     ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_local_s96_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2384,7 +2690,6 @@ body: |
     ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -2407,10 +2712,8 @@ body: |
     ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align16
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2470,7 +2773,6 @@ body: |
     ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -2493,10 +2795,8 @@ body: |
     ; GFX10: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; GFX10: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; GFX10: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX10: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align16
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2551,7 +2851,6 @@ body: |
     ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -2574,10 +2873,8 @@ body: |
     ; GFX10-UNALIGNED: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; GFX10-UNALIGNED: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; GFX10-UNALIGNED: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX10-UNALIGNED: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 3)
@@ -2596,10 +2893,9 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, align 8, addrspace 3)
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-LABEL: name: test_load_local_s96_align8
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2607,10 +2903,9 @@ body: |
     ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, align 8, addrspace 3)
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-DS128-LABEL: name: test_load_local_s96_align8
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2618,10 +2913,9 @@ body: |
     ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, align 8, addrspace 3)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI-DS128: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_local_s96_align8
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2629,10 +2923,9 @@ body: |
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, align 8, addrspace 3)
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_local_s96_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2640,10 +2933,9 @@ body: |
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, align 8, addrspace 3)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align8
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2656,10 +2948,9 @@ body: |
     ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, align 8, addrspace 3)
-    ; GFX10: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; GFX10: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align8
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2667,10 +2958,9 @@ body: |
     ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, align 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; GFX10-UNALIGNED: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load 12, align 8, addrspace 3)
@@ -2689,10 +2979,9 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-LABEL: name: test_load_local_s96_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2700,10 +2989,9 @@ body: |
     ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-DS128-LABEL: name: test_load_local_s96_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2711,10 +2999,9 @@ body: |
     ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI-DS128: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_local_s96_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2722,10 +3009,9 @@ body: |
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_local_s96_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2733,10 +3019,9 @@ body: |
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2749,10 +3034,9 @@ body: |
     ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; GFX10: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; GFX10: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2760,10 +3044,9 @@ body: |
     ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; GFX10-UNALIGNED: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 3)
@@ -2801,7 +3084,6 @@ body: |
     ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3)
@@ -2813,10 +3095,8 @@ body: |
     ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-LABEL: name: test_load_local_s96_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2843,7 +3123,6 @@ body: |
     ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3)
@@ -2855,10 +3134,8 @@ body: |
     ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-DS128-LABEL: name: test_load_local_s96_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2885,7 +3162,6 @@ body: |
     ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3)
@@ -2897,10 +3173,8 @@ body: |
     ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_local_s96_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2927,7 +3201,6 @@ body: |
     ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3)
@@ -2939,10 +3212,8 @@ body: |
     ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_local_s96_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -2969,7 +3240,6 @@ body: |
     ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3)
@@ -2981,10 +3251,8 @@ body: |
     ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -3016,7 +3284,6 @@ body: |
     ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3)
@@ -3028,10 +3295,8 @@ body: |
     ; GFX10: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX10: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -3058,7 +3323,6 @@ body: |
     ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3)
@@ -3070,10 +3334,8 @@ body: |
     ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX10-UNALIGNED: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load 12, align 2, addrspace 3)
@@ -3139,7 +3401,6 @@ body: |
     ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -3162,10 +3423,8 @@ body: |
     ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-LABEL: name: test_load_local_s96_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -3220,7 +3479,6 @@ body: |
     ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -3243,10 +3501,8 @@ body: |
     ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; CI-DS128-LABEL: name: test_load_local_s96_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -3301,7 +3557,6 @@ body: |
     ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -3324,10 +3579,8 @@ body: |
     ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; VI-LABEL: name: test_load_local_s96_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -3382,7 +3635,6 @@ body: |
     ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -3405,10 +3657,8 @@ body: |
     ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-LABEL: name: test_load_local_s96_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -3463,7 +3713,6 @@ body: |
     ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -3486,10 +3735,8 @@ body: |
     ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -3549,7 +3796,6 @@ body: |
     ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -3572,10 +3818,8 @@ body: |
     ; GFX10: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; GFX10: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; GFX10: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX10: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -3630,7 +3874,6 @@ body: |
     ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -3653,10 +3896,8 @@ body: |
     ; GFX10-UNALIGNED: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; GFX10-UNALIGNED: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; GFX10-UNALIGNED: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX10-UNALIGNED: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 3)
@@ -9045,159 +9286,141 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3)
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
     ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3)
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; SI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; CI-LABEL: name: test_load_local_v3s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
     ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3)
+    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
     ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3)
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; CI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; CI-DS128-LABEL: name: test_load_local_v3s16_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
     ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3)
+    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
     ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
     ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-DS128: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-DS128: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; CI-DS128: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-DS128: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; CI-DS128: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; CI-DS128: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; CI-DS128: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-DS128: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; CI-DS128: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; VI-LABEL: name: test_load_local_v3s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
     ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_local_v3s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
     ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
+    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 2, addrspace 3)
@@ -9210,54 +9433,42 @@ body: |
     ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3)
-    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
     ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3)
-    ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX10: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; GFX10: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0
-    ; GFX10: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX10: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX10: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; GFX10: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; GFX10: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX10: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; GFX10: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; GFX10: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
+    ; GFX10: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; GFX10: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX10: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
+    ; GFX10: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
     ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
     ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3)
-    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; GFX10-UNALIGNED: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX10-UNALIGNED: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX10-UNALIGNED: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; GFX10-UNALIGNED: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX10-UNALIGNED: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; GFX10-UNALIGNED: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
+    ; GFX10-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX10-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
+    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 3)
     %2:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -9300,14 +9511,8 @@ body: |
     ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
     ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
     ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
     ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3)
     ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
     ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3)
@@ -9316,23 +9521,26 @@ body: |
     ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
     ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
+    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32
-    ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; SI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; CI-LABEL: name: test_load_local_v3s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -9357,19 +9565,13 @@ body: |
     ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
     ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
+    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
     ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3)
     ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
     ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3)
@@ -9378,23 +9580,26 @@ body: |
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
     ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
+    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32
-    ; CI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; CI-DS128-LABEL: name: test_load_local_v3s16_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -9424,14 +9629,8 @@ body: |
     ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
     ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
     ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
     ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3)
     ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
     ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3)
@@ -9440,23 +9639,26 @@ body: |
     ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
     ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
-    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
-    ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
+    ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-DS128: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI-DS128: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-DS128: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; CI-DS128: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-DS128: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; CI-DS128: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; CI-DS128: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32
-    ; CI-DS128: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI-DS128: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; CI-DS128: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; VI-LABEL: name: test_load_local_v3s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -9482,14 +9684,8 @@ body: |
     ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
     ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
     ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3)
     ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
     ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3)
@@ -9497,22 +9693,25 @@ body: |
     ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
     ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
     ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32
-    ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_local_v3s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -9538,9 +9737,6 @@ body: |
     ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
     ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
     ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3)
@@ -9552,20 +9748,18 @@ body: |
     ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
+    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 1, addrspace 3)
@@ -9597,9 +9791,6 @@ body: |
     ; GFX10: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
     ; GFX10: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
     ; GFX10: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3)
@@ -9611,20 +9802,18 @@ body: |
     ; GFX10: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; GFX10: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; GFX10: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX10: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; GFX10: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0
-    ; GFX10: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX10: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX10: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; GFX10: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32
-    ; GFX10: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX10: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; GFX10: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; GFX10: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX10: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32)
+    ; GFX10: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; GFX10: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX10: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
+    ; GFX10: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -9650,9 +9839,6 @@ body: |
     ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
     ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
     ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX10-UNALIGNED: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3)
@@ -9664,20 +9850,18 @@ body: |
     ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; GFX10-UNALIGNED: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX10-UNALIGNED: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX10-UNALIGNED: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; GFX10-UNALIGNED: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX10-UNALIGNED: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; GFX10-UNALIGNED: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX10-UNALIGNED: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX10-UNALIGNED: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32)
+    ; GFX10-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX10-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
+    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 3)
     %2:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -11150,7 +11334,6 @@ body: |
     ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -11173,10 +11356,8 @@ body: |
     ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; CI-LABEL: name: test_load_local_v3s32_align16
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -11230,7 +11411,6 @@ body: |
     ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -11253,10 +11433,8 @@ body: |
     ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v3s32_align16
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -11310,7 +11488,6 @@ body: |
     ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -11333,10 +11510,8 @@ body: |
     ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; VI-LABEL: name: test_load_local_v3s32_align16
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -11390,7 +11565,6 @@ body: |
     ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -11413,10 +11587,8 @@ body: |
     ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-LABEL: name: test_load_local_v3s32_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -11470,7 +11642,6 @@ body: |
     ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -11493,10 +11664,8 @@ body: |
     ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s32_align16
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 1, addrspace 3)
@@ -11554,7 +11723,6 @@ body: |
     ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -11577,10 +11745,8 @@ body: |
     ; GFX10: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; GFX10: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; GFX10: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX10: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s32_align16
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -11634,7 +11800,6 @@ body: |
     ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -11657,10 +11822,8 @@ body: |
     ; GFX10-UNALIGNED: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; GFX10-UNALIGNED: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; GFX10-UNALIGNED: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX10-UNALIGNED: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 1, addrspace 3)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -11678,50 +11841,45 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; CI-LABEL: name: test_load_local_v3s32_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
     ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v3s32_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
     ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; CI-DS128: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; VI-LABEL: name: test_load_local_v3s32_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-LABEL: name: test_load_local_v3s32_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s32_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
@@ -11732,20 +11890,18 @@ body: |
     ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; GFX10: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; GFX10: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s32_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
     ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
-    ; GFX10-UNALIGNED: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 3)
     $vgpr0_vgpr1_vgpr2 = COPY %1
@@ -14122,72 +14278,66 @@ body: |
     ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 16, align 16, addrspace 3)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; CI-DS128: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; CI-DS128: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; VI-LABEL: name: test_load_local_v3s64_align32
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3)
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 16, align 16, addrspace 3)
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-LABEL: name: test_load_local_v3s64_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 16, align 16, addrspace 3)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s64_align32
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3)
     ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 16, align 16, addrspace 3)
-    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; GFX9-UNALIGNED: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX10-LABEL: name: test_load_local_v3s64_align32
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3)
     ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX10: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 16, align 16, addrspace 3)
-    ; GFX10: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; GFX10: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; GFX10: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX10: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; GFX10: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s64_align32
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3)
     ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 16, align 16, addrspace 3)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
-    ; GFX10-UNALIGNED: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
-    ; GFX10-UNALIGNED: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
+    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
+    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
+    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 32, addrspace 3)
     %2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -15125,7 +15275,6 @@ body: |
     ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -15148,11 +15297,8 @@ body: |
     ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[COPY13:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY13]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; SI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
     ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3)
@@ -15162,18 +15308,18 @@ body: |
     ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3)
     ; SI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
     ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3)
-    ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
-    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
-    ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
-    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
     ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
     ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
-    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
     ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
     ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
+    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
     ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
     ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; SI: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
@@ -15184,21 +15330,20 @@ body: |
     ; SI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load 1 from unknown-address + 18, addrspace 3)
     ; SI: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
     ; SI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load 1 from unknown-address + 19, addrspace 3)
-    ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
-    ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
-    ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
-    ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
+    ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
+    ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
     ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
     ; SI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
-    ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
+    ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
     ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
     ; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
-    ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
+    ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
     ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
     ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32)
     ; SI: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
     ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load 1 from unknown-address + 20, addrspace 3)
     ; SI: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
@@ -15207,27 +15352,26 @@ body: |
     ; SI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load 1 from unknown-address + 22, addrspace 3)
     ; SI: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
     ; SI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load 1 from unknown-address + 23, addrspace 3)
-    ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
-    ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
-    ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
-    ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
+    ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
+    ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
     ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
     ; SI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
-    ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
+    ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
     ; SI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
     ; SI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; SI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
-    ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
+    ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
+    ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
     ; SI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
     ; SI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; SI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR17]](s32), 64
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; SI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY27:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY26]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY27]](s96)
+    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; SI: [[COPY25:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96)
+    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96)
     ; CI-LABEL: name: test_extload_local_v2s96_from_24_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -15281,7 +15425,6 @@ body: |
     ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -15304,11 +15447,8 @@ body: |
     ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[COPY13:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY13]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; CI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
     ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3)
@@ -15318,18 +15458,18 @@ body: |
     ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3)
     ; CI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
     ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3)
-    ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
-    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
-    ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
-    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
     ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
     ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
-    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
     ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
     ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
+    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
     ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
     ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; CI: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
@@ -15340,21 +15480,20 @@ body: |
     ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load 1 from unknown-address + 18, addrspace 3)
     ; CI: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
     ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load 1 from unknown-address + 19, addrspace 3)
-    ; CI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
-    ; CI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
-    ; CI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
-    ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; CI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
+    ; CI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; CI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
+    ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
     ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
     ; CI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; CI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
-    ; CI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; CI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
+    ; CI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
     ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
     ; CI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; CI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
-    ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; CI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
+    ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
     ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
     ; CI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32)
     ; CI: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
     ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load 1 from unknown-address + 20, addrspace 3)
     ; CI: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
@@ -15363,27 +15502,26 @@ body: |
     ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load 1 from unknown-address + 22, addrspace 3)
     ; CI: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
     ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load 1 from unknown-address + 23, addrspace 3)
-    ; CI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
-    ; CI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
-    ; CI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
-    ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; CI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
+    ; CI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; CI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
+    ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
     ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
     ; CI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; CI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
-    ; CI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; CI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
+    ; CI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
     ; CI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
     ; CI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; CI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
-    ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
+    ; CI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
+    ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
     ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
     ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; CI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; CI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR17]](s32), 64
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; CI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI: [[COPY27:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY26]](s96)
-    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY27]](s96)
+    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI: [[COPY25:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96)
+    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96)
     ; CI-DS128-LABEL: name: test_extload_local_v2s96_from_24_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -15437,7 +15575,6 @@ body: |
     ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -15460,11 +15597,8 @@ body: |
     ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[COPY13:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY13]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; CI-DS128: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
     ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3)
@@ -15474,18 +15608,18 @@ body: |
     ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3)
     ; CI-DS128: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
     ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3)
-    ; CI-DS128: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
-    ; CI-DS128: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
-    ; CI-DS128: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
-    ; CI-DS128: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; CI-DS128: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; CI-DS128: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; CI-DS128: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; CI-DS128: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
     ; CI-DS128: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
     ; CI-DS128: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; CI-DS128: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
-    ; CI-DS128: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; CI-DS128: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; CI-DS128: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
     ; CI-DS128: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
     ; CI-DS128: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; CI-DS128: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; CI-DS128: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; CI-DS128: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
+    ; CI-DS128: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
     ; CI-DS128: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
     ; CI-DS128: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; CI-DS128: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
@@ -15496,21 +15630,20 @@ body: |
     ; CI-DS128: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load 1 from unknown-address + 18, addrspace 3)
     ; CI-DS128: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
     ; CI-DS128: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load 1 from unknown-address + 19, addrspace 3)
-    ; CI-DS128: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
-    ; CI-DS128: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
-    ; CI-DS128: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
-    ; CI-DS128: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; CI-DS128: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
+    ; CI-DS128: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; CI-DS128: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
+    ; CI-DS128: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
     ; CI-DS128: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
     ; CI-DS128: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; CI-DS128: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
-    ; CI-DS128: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; CI-DS128: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
+    ; CI-DS128: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
     ; CI-DS128: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
     ; CI-DS128: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; CI-DS128: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
-    ; CI-DS128: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; CI-DS128: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
+    ; CI-DS128: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
     ; CI-DS128: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
     ; CI-DS128: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32)
     ; CI-DS128: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
     ; CI-DS128: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load 1 from unknown-address + 20, addrspace 3)
     ; CI-DS128: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
@@ -15519,27 +15652,26 @@ body: |
     ; CI-DS128: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load 1 from unknown-address + 22, addrspace 3)
     ; CI-DS128: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
     ; CI-DS128: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load 1 from unknown-address + 23, addrspace 3)
-    ; CI-DS128: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
-    ; CI-DS128: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
-    ; CI-DS128: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
-    ; CI-DS128: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; CI-DS128: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
+    ; CI-DS128: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; CI-DS128: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
+    ; CI-DS128: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
     ; CI-DS128: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
     ; CI-DS128: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; CI-DS128: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
-    ; CI-DS128: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; CI-DS128: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
+    ; CI-DS128: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
     ; CI-DS128: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
     ; CI-DS128: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; CI-DS128: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
-    ; CI-DS128: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
+    ; CI-DS128: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
+    ; CI-DS128: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
     ; CI-DS128: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
     ; CI-DS128: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; CI-DS128: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; CI-DS128: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR17]](s32), 64
-    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; CI-DS128: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI-DS128: [[COPY27:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[COPY26]](s96)
-    ; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY27]](s96)
+    ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-DS128: [[COPY25:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-DS128: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96)
+    ; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96)
     ; VI-LABEL: name: test_extload_local_v2s96_from_24_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -15593,7 +15725,6 @@ body: |
     ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -15616,11 +15747,8 @@ body: |
     ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[COPY13:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY13]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
     ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3)
@@ -15630,18 +15758,18 @@ body: |
     ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3)
     ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
     ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3)
-    ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
-    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
-    ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
-    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
     ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
     ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
-    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
     ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
     ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
+    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
     ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
     ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; VI: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
@@ -15652,21 +15780,20 @@ body: |
     ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load 1 from unknown-address + 18, addrspace 3)
     ; VI: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
     ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load 1 from unknown-address + 19, addrspace 3)
-    ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
-    ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
-    ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
-    ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
+    ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
+    ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
     ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
     ; VI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
-    ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
+    ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
     ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
     ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
-    ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
+    ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
     ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
     ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32)
     ; VI: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
     ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load 1 from unknown-address + 20, addrspace 3)
     ; VI: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
@@ -15675,27 +15802,26 @@ body: |
     ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load 1 from unknown-address + 22, addrspace 3)
     ; VI: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
     ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load 1 from unknown-address + 23, addrspace 3)
-    ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
-    ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
-    ; VI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
-    ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
+    ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
+    ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
     ; VI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
     ; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; VI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
-    ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; VI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
+    ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
     ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
     ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; VI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
-    ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
+    ; VI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
+    ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
     ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
     ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; VI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR17]](s32), 64
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; VI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY27:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY26]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY27]](s96)
+    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; VI: [[COPY25:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96)
+    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96)
     ; GFX9-LABEL: name: test_extload_local_v2s96_from_24_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -15749,7 +15875,6 @@ body: |
     ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -15772,11 +15897,8 @@ body: |
     ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[COPY13:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY13]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
     ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3)
@@ -15786,18 +15908,18 @@ body: |
     ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3)
     ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
     ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3)
-    ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
-    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
-    ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
-    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
     ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
     ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
-    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
     ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
     ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
+    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
     ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
     ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; GFX9: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
@@ -15808,21 +15930,20 @@ body: |
     ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load 1 from unknown-address + 18, addrspace 3)
     ; GFX9: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
     ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load 1 from unknown-address + 19, addrspace 3)
-    ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
-    ; GFX9: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
-    ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
-    ; GFX9: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
+    ; GFX9: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
+    ; GFX9: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
     ; GFX9: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
     ; GFX9: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
-    ; GFX9: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
+    ; GFX9: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
     ; GFX9: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
     ; GFX9: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
-    ; GFX9: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
+    ; GFX9: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
     ; GFX9: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
     ; GFX9: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32)
     ; GFX9: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
     ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load 1 from unknown-address + 20, addrspace 3)
     ; GFX9: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
@@ -15831,27 +15952,26 @@ body: |
     ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load 1 from unknown-address + 22, addrspace 3)
     ; GFX9: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
     ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load 1 from unknown-address + 23, addrspace 3)
-    ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
-    ; GFX9: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
-    ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
-    ; GFX9: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
+    ; GFX9: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
+    ; GFX9: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
     ; GFX9: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
     ; GFX9: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; GFX9: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
-    ; GFX9: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
+    ; GFX9: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
     ; GFX9: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
     ; GFX9: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; GFX9: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
-    ; GFX9: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
+    ; GFX9: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
+    ; GFX9: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
     ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
     ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; GFX9: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR17]](s32), 64
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; GFX9: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9: [[COPY27:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY26]](s96)
-    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY27]](s96)
+    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX9: [[COPY25:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96)
+    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96)
     ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s96_from_24_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 1, addrspace 3)
@@ -15917,7 +16037,6 @@ body: |
     ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -15940,11 +16059,8 @@ body: |
     ; GFX10: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; GFX10: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; GFX10: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10: [[COPY13:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY13]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX10: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; GFX10: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
     ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3)
@@ -15954,18 +16070,18 @@ body: |
     ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3)
     ; GFX10: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
     ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3)
-    ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
-    ; GFX10: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
-    ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
-    ; GFX10: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; GFX10: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; GFX10: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
     ; GFX10: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
     ; GFX10: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
-    ; GFX10: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; GFX10: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
     ; GFX10: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
     ; GFX10: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; GFX10: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
+    ; GFX10: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
     ; GFX10: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
     ; GFX10: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; GFX10: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
@@ -15976,21 +16092,20 @@ body: |
     ; GFX10: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load 1 from unknown-address + 18, addrspace 3)
     ; GFX10: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
     ; GFX10: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load 1 from unknown-address + 19, addrspace 3)
-    ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
-    ; GFX10: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
-    ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
-    ; GFX10: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
+    ; GFX10: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
+    ; GFX10: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
     ; GFX10: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
     ; GFX10: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
-    ; GFX10: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
+    ; GFX10: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
     ; GFX10: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
     ; GFX10: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
-    ; GFX10: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
+    ; GFX10: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
     ; GFX10: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
     ; GFX10: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32)
     ; GFX10: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
     ; GFX10: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load 1 from unknown-address + 20, addrspace 3)
     ; GFX10: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
@@ -15999,27 +16114,26 @@ body: |
     ; GFX10: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load 1 from unknown-address + 22, addrspace 3)
     ; GFX10: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
     ; GFX10: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load 1 from unknown-address + 23, addrspace 3)
-    ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
-    ; GFX10: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
-    ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
-    ; GFX10: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
+    ; GFX10: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
+    ; GFX10: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
     ; GFX10: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
     ; GFX10: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
-    ; GFX10: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
+    ; GFX10: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
     ; GFX10: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
     ; GFX10: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; GFX10: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
-    ; GFX10: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
+    ; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
+    ; GFX10: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
     ; GFX10: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
     ; GFX10: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; GFX10: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; GFX10: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR17]](s32), 64
-    ; GFX10: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; GFX10: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX10: [[COPY27:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY26]](s96)
-    ; GFX10: $vgpr3_vgpr4_vgpr5 = COPY [[COPY27]](s96)
+    ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; GFX10: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX10: [[COPY25:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX10: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96)
+    ; GFX10: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s96_from_24_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -16073,7 +16187,6 @@ body: |
     ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
     ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
     ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
     ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3)
     ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
@@ -16096,11 +16209,8 @@ body: |
     ; GFX10-UNALIGNED: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
     ; GFX10-UNALIGNED: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
     ; GFX10-UNALIGNED: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[COPY13:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY13]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX10-UNALIGNED: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR8]](s32), 64
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32)
+    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10-UNALIGNED: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; GFX10-UNALIGNED: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
     ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3)
@@ -16110,18 +16220,18 @@ body: |
     ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3)
     ; GFX10-UNALIGNED: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
     ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3)
-    ; GFX10-UNALIGNED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
-    ; GFX10-UNALIGNED: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
-    ; GFX10-UNALIGNED: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
-    ; GFX10-UNALIGNED: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; GFX10-UNALIGNED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; GFX10-UNALIGNED: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; GFX10-UNALIGNED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; GFX10-UNALIGNED: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
     ; GFX10-UNALIGNED: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
     ; GFX10-UNALIGNED: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
-    ; GFX10-UNALIGNED: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
-    ; GFX10-UNALIGNED: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; GFX10-UNALIGNED: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; GFX10-UNALIGNED: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
     ; GFX10-UNALIGNED: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C5]](s32)
     ; GFX10-UNALIGNED: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
-    ; GFX10-UNALIGNED: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; GFX10-UNALIGNED: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; GFX10-UNALIGNED: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
+    ; GFX10-UNALIGNED: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
     ; GFX10-UNALIGNED: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32)
     ; GFX10-UNALIGNED: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; GFX10-UNALIGNED: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32)
@@ -16132,21 +16242,20 @@ body: |
     ; GFX10-UNALIGNED: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load 1 from unknown-address + 18, addrspace 3)
     ; GFX10-UNALIGNED: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32)
     ; GFX10-UNALIGNED: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load 1 from unknown-address + 19, addrspace 3)
-    ; GFX10-UNALIGNED: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
-    ; GFX10-UNALIGNED: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
-    ; GFX10-UNALIGNED: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
-    ; GFX10-UNALIGNED: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; GFX10-UNALIGNED: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32)
+    ; GFX10-UNALIGNED: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; GFX10-UNALIGNED: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32)
+    ; GFX10-UNALIGNED: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
     ; GFX10-UNALIGNED: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32)
     ; GFX10-UNALIGNED: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
-    ; GFX10-UNALIGNED: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
-    ; GFX10-UNALIGNED: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; GFX10-UNALIGNED: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD18]](s32)
+    ; GFX10-UNALIGNED: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
     ; GFX10-UNALIGNED: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C5]](s32)
     ; GFX10-UNALIGNED: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
-    ; GFX10-UNALIGNED: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
-    ; GFX10-UNALIGNED: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; GFX10-UNALIGNED: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32)
+    ; GFX10-UNALIGNED: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
     ; GFX10-UNALIGNED: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32)
     ; GFX10-UNALIGNED: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32)
     ; GFX10-UNALIGNED: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32)
     ; GFX10-UNALIGNED: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load 1 from unknown-address + 20, addrspace 3)
     ; GFX10-UNALIGNED: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32)
@@ -16155,27 +16264,26 @@ body: |
     ; GFX10-UNALIGNED: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load 1 from unknown-address + 22, addrspace 3)
     ; GFX10-UNALIGNED: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32)
     ; GFX10-UNALIGNED: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load 1 from unknown-address + 23, addrspace 3)
-    ; GFX10-UNALIGNED: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
-    ; GFX10-UNALIGNED: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
-    ; GFX10-UNALIGNED: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
-    ; GFX10-UNALIGNED: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; GFX10-UNALIGNED: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32)
+    ; GFX10-UNALIGNED: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; GFX10-UNALIGNED: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32)
+    ; GFX10-UNALIGNED: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
     ; GFX10-UNALIGNED: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32)
     ; GFX10-UNALIGNED: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
-    ; GFX10-UNALIGNED: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
-    ; GFX10-UNALIGNED: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; GFX10-UNALIGNED: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD22]](s32)
+    ; GFX10-UNALIGNED: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
     ; GFX10-UNALIGNED: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C5]](s32)
     ; GFX10-UNALIGNED: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
-    ; GFX10-UNALIGNED: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
-    ; GFX10-UNALIGNED: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
+    ; GFX10-UNALIGNED: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32)
+    ; GFX10-UNALIGNED: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
     ; GFX10-UNALIGNED: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C6]](s32)
     ; GFX10-UNALIGNED: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
-    ; GFX10-UNALIGNED: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; GFX10-UNALIGNED: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR17]](s32), 64
-    ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; GFX10-UNALIGNED: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX10-UNALIGNED: [[COPY27:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY26]](s96)
-    ; GFX10-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY27]](s96)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32)
+    ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX10-UNALIGNED: [[COPY25:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX10-UNALIGNED: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96)
+    ; GFX10-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 1, addrspace 3)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -16215,7 +16323,6 @@ body: |
     ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3)
@@ -16227,50 +16334,45 @@ body: |
     ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[COPY7:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY7]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
     ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3)
     ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
     ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3)
-    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
-    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
-    ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
     ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
     ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
     ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
     ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 2 from unknown-address + 16, addrspace 3)
     ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
     ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 2 from unknown-address + 18, addrspace 3)
-    ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
-    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
-    ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
-    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
     ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
     ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32)
     ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
     ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 2 from unknown-address + 20, addrspace 3)
     ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
     ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 2 from unknown-address + 22, addrspace 3)
-    ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
-    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
-    ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+    ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
+    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
     ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
     ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; SI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR5]](s32), 64
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; SI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY15:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY14]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY15]](s96)
+    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; SI: [[COPY13:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96)
+    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96)
     ; CI-LABEL: name: test_extload_local_v2s96_from_24_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
@@ -16296,7 +16398,6 @@ body: |
     ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3)
@@ -16308,50 +16409,45 @@ body: |
     ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[COPY7:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY7]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
     ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3)
     ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
     ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3)
-    ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
-    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
-    ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
+    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
     ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
     ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
     ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
     ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 2 from unknown-address + 16, addrspace 3)
     ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
     ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 2 from unknown-address + 18, addrspace 3)
-    ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
-    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
-    ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
-    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
     ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
     ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32)
     ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
     ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 2 from unknown-address + 20, addrspace 3)
     ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
     ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 2 from unknown-address + 22, addrspace 3)
-    ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
-    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
-    ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+    ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
+    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
     ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
     ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; CI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; CI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR5]](s32), 64
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; CI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI: [[COPY15:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY14]](s96)
-    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY15]](s96)
+    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI: [[COPY13:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96)
+    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96)
     ; CI-DS128-LABEL: name: test_extload_local_v2s96_from_24_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
@@ -16377,7 +16473,6 @@ body: |
     ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3)
@@ -16389,50 +16484,45 @@ body: |
     ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[COPY7:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY7]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
     ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3)
     ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
     ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3)
-    ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
-    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
-    ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
+    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
     ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
     ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
     ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
     ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 2 from unknown-address + 16, addrspace 3)
     ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
     ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 2 from unknown-address + 18, addrspace 3)
-    ; CI-DS128: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
-    ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
-    ; CI-DS128: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
-    ; CI-DS128: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; CI-DS128: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; CI-DS128: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
     ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
     ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32)
     ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
     ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 2 from unknown-address + 20, addrspace 3)
     ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
     ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 2 from unknown-address + 22, addrspace 3)
-    ; CI-DS128: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
-    ; CI-DS128: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
-    ; CI-DS128: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+    ; CI-DS128: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; CI-DS128: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; CI-DS128: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
+    ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
     ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
     ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; CI-DS128: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; CI-DS128: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR5]](s32), 64
-    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; CI-DS128: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI-DS128: [[COPY15:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[COPY14]](s96)
-    ; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY15]](s96)
+    ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-DS128: [[COPY13:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-DS128: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96)
+    ; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96)
     ; VI-LABEL: name: test_extload_local_v2s96_from_24_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
@@ -16458,7 +16548,6 @@ body: |
     ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3)
@@ -16470,50 +16559,45 @@ body: |
     ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[COPY7:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY7]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
     ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3)
     ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
     ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3)
-    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
-    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
-    ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
     ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
     ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
     ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
     ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 2 from unknown-address + 16, addrspace 3)
     ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
     ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 2 from unknown-address + 18, addrspace 3)
-    ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
-    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
-    ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
-    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
     ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
     ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32)
     ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
     ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 2 from unknown-address + 20, addrspace 3)
     ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
     ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 2 from unknown-address + 22, addrspace 3)
-    ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
-    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
-    ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+    ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
+    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
     ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
     ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; VI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR5]](s32), 64
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; VI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY15:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY14]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY15]](s96)
+    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; VI: [[COPY13:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96)
+    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96)
     ; GFX9-LABEL: name: test_extload_local_v2s96_from_24_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
@@ -16539,7 +16623,6 @@ body: |
     ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3)
@@ -16551,50 +16634,45 @@ body: |
     ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[COPY7:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY7]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
     ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3)
     ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
     ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3)
-    ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
-    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
-    ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
+    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
     ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
     ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
     ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
     ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 2 from unknown-address + 16, addrspace 3)
     ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
     ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 2 from unknown-address + 18, addrspace 3)
-    ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
-    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
-    ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
-    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
     ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
     ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32)
     ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
     ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 2 from unknown-address + 20, addrspace 3)
     ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
     ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 2 from unknown-address + 22, addrspace 3)
-    ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
-    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
-    ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+    ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
+    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
     ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
     ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; GFX9: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR5]](s32), 64
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; GFX9: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9: [[COPY15:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY14]](s96)
-    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY15]](s96)
+    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX9: [[COPY13:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96)
+    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96)
     ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s96_from_24_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 2, addrspace 3)
@@ -16632,7 +16710,6 @@ body: |
     ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3)
@@ -16644,50 +16721,45 @@ body: |
     ; GFX10: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10: [[COPY7:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY7]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX10: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
     ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3)
     ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
     ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3)
-    ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
-    ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
-    ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
+    ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
     ; GFX10: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
     ; GFX10: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
     ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
     ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 2 from unknown-address + 16, addrspace 3)
     ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
     ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 2 from unknown-address + 18, addrspace 3)
-    ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
-    ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
-    ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
-    ; GFX10: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; GFX10: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
     ; GFX10: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
     ; GFX10: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32)
     ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
     ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 2 from unknown-address + 20, addrspace 3)
     ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
     ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 2 from unknown-address + 22, addrspace 3)
-    ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
-    ; GFX10: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
-    ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; GFX10: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+    ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; GFX10: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
+    ; GFX10: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
     ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
     ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; GFX10: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; GFX10: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR5]](s32), 64
-    ; GFX10: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; GFX10: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX10: [[COPY15:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY14]](s96)
-    ; GFX10: $vgpr3_vgpr4_vgpr5 = COPY [[COPY15]](s96)
+    ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; GFX10: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX10: [[COPY13:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX10: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96)
+    ; GFX10: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s96_from_24_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
@@ -16713,7 +16785,6 @@ body: |
     ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
     ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32)
     ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3)
@@ -16725,50 +16796,45 @@ body: |
     ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
     ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
     ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[COPY7:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY7]], [[BUILD_VECTOR]](<2 x s32>), 0
-    ; GFX10-UNALIGNED: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR2]](s32), 64
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
     ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3)
     ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32)
     ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3)
-    ; GFX10-UNALIGNED: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
-    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
-    ; GFX10-UNALIGNED: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; GFX10-UNALIGNED: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; GFX10-UNALIGNED: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
+    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
     ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
     ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
     ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32)
     ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 2 from unknown-address + 16, addrspace 3)
     ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
     ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 2 from unknown-address + 18, addrspace 3)
-    ; GFX10-UNALIGNED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
-    ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
-    ; GFX10-UNALIGNED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
-    ; GFX10-UNALIGNED: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; GFX10-UNALIGNED: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+    ; GFX10-UNALIGNED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; GFX10-UNALIGNED: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]]
     ; GFX10-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
     ; GFX10-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32)
     ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32)
     ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 2 from unknown-address + 20, addrspace 3)
     ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32)
     ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 2 from unknown-address + 22, addrspace 3)
-    ; GFX10-UNALIGNED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
-    ; GFX10-UNALIGNED: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
-    ; GFX10-UNALIGNED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; GFX10-UNALIGNED: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+    ; GFX10-UNALIGNED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; GFX10-UNALIGNED: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]]
+    ; GFX10-UNALIGNED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
+    ; GFX10-UNALIGNED: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
     ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
     ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
-    ; GFX10-UNALIGNED: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[BUILD_VECTOR1]](<2 x s32>), 0
-    ; GFX10-UNALIGNED: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[OR5]](s32), 64
-    ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; GFX10-UNALIGNED: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX10-UNALIGNED: [[COPY15:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY14]](s96)
-    ; GFX10-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY15]](s96)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32)
+    ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX10-UNALIGNED: [[COPY13:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX10-UNALIGNED: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96)
+    ; GFX10-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 2, addrspace 3)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -16789,115 +16855,105 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY1]], [[LOAD]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; SI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3)
     ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
     ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 20, addrspace 3)
-    ; SI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD2]](<2 x s32>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY3:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96)
+    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-LABEL: name: test_extload_local_v2s96_from_24_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
     ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY1]], [[LOAD]](<2 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; CI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3)
     ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
     ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 20, addrspace 3)
-    ; CI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD2]](<2 x s32>), 0
-    ; CI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI: [[COPY3:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96)
-    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96)
+    ; CI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-DS128-LABEL: name: test_extload_local_v2s96_from_24_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
     ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY1]], [[LOAD]](<2 x s32>), 0
-    ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI-DS128: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; CI-DS128: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3)
     ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
     ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 20, addrspace 3)
-    ; CI-DS128: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD2]](<2 x s32>), 0
-    ; CI-DS128: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64
-    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; CI-DS128: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI-DS128: [[COPY3:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96)
-    ; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96)
+    ; CI-DS128: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI-DS128: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; VI-LABEL: name: test_extload_local_v2s96_from_24_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY1]], [[LOAD]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; VI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3)
     ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
     ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 20, addrspace 3)
-    ; VI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD2]](<2 x s32>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; VI: [[COPY3:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96)
-    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96)
+    ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-LABEL: name: test_extload_local_v2s96_from_24_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY1]], [[LOAD]](<2 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; GFX9: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3)
     ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
     ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 20, addrspace 3)
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD2]](<2 x s32>), 0
-    ; GFX9: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96)
-    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s96_from_24_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
@@ -16916,46 +16972,42 @@ body: |
     ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; GFX10: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY1]], [[LOAD]](<2 x s32>), 0
-    ; GFX10: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; GFX10: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3)
     ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
     ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 20, addrspace 3)
-    ; GFX10: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD2]](<2 x s32>), 0
-    ; GFX10: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64
-    ; GFX10: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; GFX10: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX10: [[COPY3:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96)
-    ; GFX10: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96)
+    ; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; GFX10: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX10: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX10: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX10: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s96_from_24_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
     ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY1]], [[LOAD]](<2 x s32>), 0
-    ; GFX10-UNALIGNED: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3)
     ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
     ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 20, addrspace 3)
-    ; GFX10-UNALIGNED: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD2]](<2 x s32>), 0
-    ; GFX10-UNALIGNED: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64
-    ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96)
-    ; GFX10-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96)
+    ; GFX10-UNALIGNED: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; GFX10-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 4, addrspace 3)
     %2:_(s96) = G_EXTRACT %1, 0
@@ -16976,46 +17028,42 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, align 8, addrspace 3)
-    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY1]], [[LOAD]](<2 x s32>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; SI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3)
     ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
     ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 20, addrspace 3)
-    ; SI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD2]](<2 x s32>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64
-    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; SI: [[COPY3:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96)
-    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96)
+    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-LABEL: name: test_extload_local_v2s96_from_24_align16
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 16, addrspace 3)
     ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, align 8, addrspace 3)
-    ; CI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[DEF]](<3 x s32>)
-    ; CI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY1]], [[LOAD]](<2 x s32>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
-    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; CI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3)
     ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
     ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 20, addrspace 3)
-    ; CI: [[INSERT2:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD2]](<2 x s32>), 0
-    ; CI: [[INSERT3:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64
-    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT3]](<3 x s32>)
-    ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
-    ; CI: [[COPY3:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
-    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96)
-    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96)
+    ; CI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>)
+    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
+    ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
+    ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+    ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
     ; CI-DS128-LABEL: name: test_extload_local_v2s96_from_24_align16
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI-DS128: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 16, addrspace 3)
@@ -17026,10 +17074,9 @@ body: |
     ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32)
     ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 20, addrspace 3)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD1]](<2 x s32>), 0
-    ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD2]](s32), 64
-    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; CI-DS128: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
+    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
+    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; CI-DS128: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
     ; CI-DS128: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
     ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
@@ -17044,10 +17091,9 @@ body: |
     ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32)
     ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 20, addrspace 3)
-    ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD1]](<2 x s32>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD2]](s32), 64
-    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
     ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
     ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
@@ -17062,10 +17108,9 @@ body: |
     ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32)
     ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 20, addrspace 3)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD1]](<2 x s32>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD2]](s32), 64
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
     ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
@@ -17092,10 +17137,9 @@ body: |
     ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32)
     ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 20, addrspace 3)
-    ; GFX10: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD1]](<2 x s32>), 0
-    ; GFX10: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD2]](s32), 64
-    ; GFX10: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
+    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
+    ; GFX10: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
     ; GFX10: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
     ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
@@ -17110,10 +17154,9 @@ body: |
     ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32)
     ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 20, addrspace 3)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD1]](<2 x s32>), 0
-    ; GFX10-UNALIGNED: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD2]](s32), 64
-    ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>)
+    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
+    ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
     ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
     ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
index 677bd0aaf098..727edc3ce5fe 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
@@ -598,48 +598,144 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s24_align2
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5)
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5)
     ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 2, align 2, addrspace 5)
-    ; SI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 2, align 2, addrspace 5)
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; SI: $vgpr0 = COPY [[COPY5]](s32)
     ; CI-LABEL: name: test_load_private_s24_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5)
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5)
     ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 2, align 2, addrspace 5)
-    ; CI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 2, align 2, addrspace 5)
+    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI: $vgpr0 = COPY [[COPY5]](s32)
     ; VI-LABEL: name: test_load_private_s24_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5)
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 2, align 2, addrspace 5)
-    ; VI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 2, align 2, addrspace 5)
+    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]]
+    ; VI: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_private_s24_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5)
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 2, align 2, addrspace 5)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 2, align 2, addrspace 5)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]]
+    ; GFX9: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load 3, align 2, addrspace 5)
     %2:_(s32) = G_ANYEXT %1
@@ -654,48 +750,136 @@ body: |
 
     ; SI-LABEL: name: test_load_private_s24_align1
     ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; SI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p5) :: (load 2, align 1, addrspace 5)
-    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5)
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; SI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 2, addrspace 5)
-    ; SI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5)
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; SI: $vgpr0 = COPY [[COPY4]](s32)
     ; CI-LABEL: name: test_load_private_s24_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; CI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p5) :: (load 2, align 1, addrspace 5)
-    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5)
+    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 2, addrspace 5)
-    ; CI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5)
+    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI: $vgpr0 = COPY [[COPY4]](s32)
     ; VI-LABEL: name: test_load_private_s24_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p5) :: (load 2, align 1, addrspace 5)
-    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5)
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 2, addrspace 5)
-    ; VI: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5)
+    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_private_s24_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX9: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p5) :: (load 2, align 1, addrspace 5)
-    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX9: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 2, addrspace 5)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s24) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(s24) = G_INSERT [[DEF]], [[LOAD]](s16), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(s24) = G_INSERT [[INSERT]], [[LOAD1]](s8), 16
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INSERT1]](s24)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(s24) = G_LOAD %0 :: (load 3, align 1, addrspace 5)
     %2:_(s32) = G_ANYEXT %1
@@ -714,56 +898,92 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 4, align 4, addrspace 5)
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[DEF]](s64)
-    ; SI: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY1]], [[LOAD]](s32), 0
-    ; SI: [[COPY2:%[0-9]+]]:_(s64) = COPY [[INSERT]](s64)
-    ; SI: [[INSERT1:%[0-9]+]]:_(s64) = G_INSERT [[COPY2]], [[TRUNC]](s16), 32
-    ; SI: [[COPY3:%[0-9]+]]:_(s64) = COPY [[INSERT1]](s64)
-    ; SI: $vgpr0_vgpr1 = COPY [[COPY3]](s64)
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; SI: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+    ; SI: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
     ; CI-LABEL: name: test_load_private_s48_align8
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5)
     ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 4, align 4, addrspace 5)
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[DEF]](s64)
-    ; CI: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY1]], [[LOAD]](s32), 0
-    ; CI: [[COPY2:%[0-9]+]]:_(s64) = COPY [[INSERT]](s64)
-    ; CI: [[INSERT1:%[0-9]+]]:_(s64) = G_INSERT [[COPY2]], [[TRUNC]](s16), 32
-    ; CI: [[COPY3:%[0-9]+]]:_(s64) = COPY [[INSERT1]](s64)
-    ; CI: $vgpr0_vgpr1 = COPY [[COPY3]](s64)
+    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CI: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+    ; CI: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
     ; VI-LABEL: name: test_load_private_s48_align8
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5)
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 4, align 4, addrspace 5)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[DEF]](s64)
-    ; VI: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY1]], [[LOAD]](s32), 0
-    ; VI: [[COPY2:%[0-9]+]]:_(s64) = COPY [[INSERT]](s64)
-    ; VI: [[INSERT1:%[0-9]+]]:_(s64) = G_INSERT [[COPY2]], [[TRUNC]](s16), 32
-    ; VI: [[COPY3:%[0-9]+]]:_(s64) = COPY [[INSERT1]](s64)
-    ; VI: $vgpr0_vgpr1 = COPY [[COPY3]](s64)
+    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; VI: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+    ; VI: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
     ; GFX9-LABEL: name: test_load_private_s48_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 4, align 4, addrspace 5)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[DEF]](s64)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[COPY1]], [[LOAD]](s32), 0
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[INSERT]](s64)
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(s64) = G_INSERT [[COPY2]], [[TRUNC]](s16), 32
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s64) = COPY [[INSERT1]](s64)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[COPY3]](s64)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
     %0:_(p5) = COPY $vgpr0
     %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 5)
     %2:_(s64) = G_ANYEXT %1
@@ -5202,84 +5422,108 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 4, align 4, addrspace 5)
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[LOAD]](<2 x s16>), 0
-    ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; SI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; CI-LABEL: name: test_load_private_v3s16_align8
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5)
     ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 4, align 4, addrspace 5)
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
+    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[LOAD]](<2 x s16>), 0
-    ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; CI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; VI-LABEL: name: test_load_private_v3s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5)
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 4, align 4, addrspace 5)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
+    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[LOAD]](<2 x s16>), 0
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_private_v3s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 4, align 4, addrspace 5)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[LOAD]](<2 x s16>), 0
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
+    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 8, addrspace 5)
     %2:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -5299,126 +5543,111 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5)
-    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5)
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
     ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5)
-    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; SI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; CI-LABEL: name: test_load_private_v3s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5)
     ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5)
+    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
     ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5)
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; CI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; VI-LABEL: name: test_load_private_v3s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5)
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5)
-    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
     ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-    ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5)
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_private_v3s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
     ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
+    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 5)
     %2:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -5461,14 +5690,8 @@ body: |
     ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
     ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
     ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
     ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5)
     ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
     ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5)
@@ -5477,23 +5700,26 @@ body: |
     ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
     ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
-    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
+    ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32
-    ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; SI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; CI-LABEL: name: test_load_private_v3s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5)
@@ -5523,14 +5749,8 @@ body: |
     ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32)
     ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
     ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
-    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32)
+    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
     ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5)
     ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
     ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5)
@@ -5539,23 +5759,26 @@ body: |
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
     ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
-    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
+    ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
+    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32
-    ; CI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; CI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; VI-LABEL: name: test_load_private_v3s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5)
@@ -5581,14 +5804,8 @@ body: |
     ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
     ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
     ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
-    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5)
     ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
     ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5)
@@ -5596,22 +5813,25 @@ body: |
     ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
     ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
     ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
-    ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
-    ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]]
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
+    ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
+    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
+    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
     ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0
-    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32
-    ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; GFX9-LABEL: name: test_load_private_v3s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5)
@@ -5637,9 +5857,6 @@ body: |
     ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
     ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
     ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
     ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5)
@@ -5651,20 +5868,18 @@ body: |
     ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
-    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
-    ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
-    ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
+    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 5)
     %2:_(<4 x s16>) = G_IMPLICIT_DEF

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir
index 9b4965405922..7ed97440ef69 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir
@@ -728,19 +728,20 @@ body: |
     ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
     ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[EXTRACT2]], [[EXTRACT4]](<2 x s16>)
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT3]], [[EXTRACT5]](s16)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[DEF1]](s32)
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF2]](<2 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[LSHR]](<2 x s16>), 0
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0
-    ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[LSHR1]](s16), 32
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT7]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
-    ; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV8]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT8]](<4 x s16>)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
     %2:_(<3 x s16>) = G_EXTRACT %0, 0
@@ -887,38 +888,28 @@ body: |
     ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
     ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[EXTRACT]], [[EXTRACT2]](<2 x s16>)
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT1]], [[EXTRACT3]](s16)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0
-    ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[LSHR]](<2 x s16>), 0
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV8]](<3 x s16>), 0
-    ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[LSHR1]](s16), 32
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<4 x s16>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
     ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; GFX9: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
-    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
+    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
     ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
-    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
     ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16)
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
-    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32)
-    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
-    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32)
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[COPY4]](s32)
+    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
     %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
index bf66ede9b86a..6543b71a3a80 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
@@ -147,10 +147,9 @@ body: |
     ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64
     ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[EXTRACT]], [[EXTRACT2]]
     ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[EXTRACT1]], [[EXTRACT3]]
-    ; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[OR]](s64), 0
-    ; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR1]](s32), 64
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](s64)
+    ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[OR1]](s32)
+    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
     %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
     %2:_(s96) = G_OR %0, %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
index 7fbd8aef328f..fd0f7532cca3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
@@ -172,10 +172,9 @@ body: |
     ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64
     ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[EXTRACT2]]
     ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[EXTRACT1]], [[EXTRACT3]]
-    ; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[SELECT]](s64), 0
-    ; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[SELECT1]](s32), 64
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT]](s64)
+    ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[SELECT1]](s32)
+    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
     %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
     %2:_(s32) = COPY $vgpr6
@@ -944,19 +943,17 @@ body: |
     ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[UV2]](s96), 64
     ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[EXTRACT2]]
     ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[EXTRACT1]], [[EXTRACT3]]
-    ; CHECK: [[DEF2:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY [[DEF2]](s96)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY1]], [[SELECT]](s64), 0
-    ; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[SELECT1]](s32), 64
+    ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT]](s64)
+    ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV4]](s32), [[UV5]](s32), [[SELECT1]](s32)
     ; CHECK: [[EXTRACT4:%[0-9]+]]:_(s64) = G_EXTRACT [[UV1]](s96), 0
     ; CHECK: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[UV1]](s96), 64
     ; CHECK: [[EXTRACT6:%[0-9]+]]:_(s64) = G_EXTRACT [[UV3]](s96), 0
     ; CHECK: [[EXTRACT7:%[0-9]+]]:_(s32) = G_EXTRACT [[UV3]](s96), 64
     ; CHECK: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[EXTRACT4]], [[EXTRACT6]]
     ; CHECK: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[EXTRACT5]], [[EXTRACT7]]
-    ; CHECK: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF2]], [[SELECT2]](s64), 0
-    ; CHECK: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[SELECT3]](s32), 64
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s96>) = G_BUILD_VECTOR [[INSERT1]](s96), [[INSERT3]](s96)
+    ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT2]](s64)
+    ; CHECK: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV6]](s32), [[UV7]](s32), [[SELECT3]](s32)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s96>) = G_BUILD_VECTOR [[MV]](s96), [[MV1]](s96)
     ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s96>)
     %0:_(<2 x s96>) = G_IMPLICIT_DEF
     %1:_(<2 x s96>) = G_IMPLICIT_DEF

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir
index b9af8cef5937..9120e992521b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir
@@ -730,38 +730,28 @@ body: |
     ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
     ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[EXTRACT]], [[EXTRACT2]](<2 x s16>)
     ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[EXTRACT1]], [[EXTRACT3]](s16)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0
-    ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[SHL]](<2 x s16>), 0
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
-    ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV8]](<3 x s16>), 0
-    ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[SHL1]](s16), 32
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<4 x s16>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
-    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
-    ; GFX9: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
-    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
+    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
     ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
-    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
     ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16)
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
-    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32)
-    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32)
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[COPY4]](s32)
+    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
     %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
index ef369835e9ac..88bd5038b158 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
@@ -147,10 +147,9 @@ body: |
     ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64
     ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[EXTRACT]], [[EXTRACT2]]
     ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[EXTRACT1]], [[EXTRACT3]]
-    ; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[XOR]](s64), 0
-    ; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[XOR1]](s32), 64
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[XOR1]](s32)
+    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
     %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
     %2:_(s96) = G_XOR %0, %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
index d53a200af528..1b6f948196c1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
@@ -672,43 +672,147 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
     ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[C1]](s64)
-    ; CHECK: [[DEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY [[DEF]](s128)
-    ; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY1]], [[C]](s64), 0
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s128) = COPY [[INSERT]](s128)
-    ; CHECK: [[INSERT1:%[0-9]+]]:_(s128) = G_INSERT [[COPY2]], [[TRUNC]](s48), 64
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
     ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
+    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
     ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
     ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
     ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C2]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL1]]
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
     ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C2]](s32)
+    ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
+    ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
+    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
+    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C2]](s32)
+    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
+    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C2]](s32)
+    ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL5]]
+    ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
     ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+    ; CHECK: [[COPY11:%[0-9]+]]:_(s64) = COPY [[MV2]](s64)
     ; CHECK: [[EXTRACT:%[0-9]+]]:_(s48) = G_EXTRACT [[DEF1]](s64), 0
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s128) = COPY [[INSERT1]](s128)
-    ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY6]](s128), 0
-    ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s48) = G_EXTRACT [[COPY6]](s128), 64
-    ; CHECK: [[AND2:%[0-9]+]]:_(s64) = G_AND [[COPY5]], [[EXTRACT1]]
+    ; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+    ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s48) = G_EXTRACT [[MV1]](s64), 0
+    ; CHECK: [[AND9:%[0-9]+]]:_(s64) = G_AND [[COPY11]], [[COPY12]]
     ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s48)
-    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT2]](s48)
-    ; CHECK: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[ANYEXT1]]
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[AND3]](s64)
-    ; CHECK: [[DEF2:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s128) = COPY [[DEF2]](s128)
-    ; CHECK: [[INSERT2:%[0-9]+]]:_(s128) = G_INSERT [[COPY7]], [[AND2]](s64), 0
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s128) = COPY [[INSERT2]](s128)
-    ; CHECK: [[INSERT3:%[0-9]+]]:_(s128) = G_INSERT [[COPY8]], [[TRUNC1]](s48), 64
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s112) = G_TRUNC [[INSERT3]](s128)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC2]](s112)
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT1]](s48)
+    ; CHECK: [[AND10:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[ANYEXT1]]
+    ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND9]](s64)
+    ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND10]](s64)
+    ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; CHECK: [[DEF2:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](s64)
+    ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32)
+    ; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C2]](s32)
+    ; CHECK: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](s64)
+    ; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C2]](s32)
+    ; CHECK: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C2]](s32)
+    ; CHECK: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](s64)
+    ; CHECK: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C2]](s32)
+    ; CHECK: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV13]], [[C2]](s32)
+    ; CHECK: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](s64)
+    ; CHECK: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV14]], [[C2]](s32)
+    ; CHECK: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV15]], [[C2]](s32)
+    ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
+    ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
+    ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32)
+    ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]]
+    ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[UV5]](s32)
+    ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32)
+    ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]]
+    ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32)
+    ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[UV6]](s32)
+    ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
+    ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; CHECK: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
+    ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND16]], [[C2]](s32)
+    ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND15]], [[SHL8]]
+    ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV7]](s32)
+    ; CHECK: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; CHECK: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND17]], [[SHL3]]
+    ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32)
+    ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[UV8]](s32)
+    ; CHECK: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C2]](s32)
+    ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY8]], [[SHL9]]
+    ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; CHECK: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
+    ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV9]](s32)
+    ; CHECK: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
+    ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C2]](s32)
+    ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL10]]
+    ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
+    ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[UV10]](s32)
+    ; CHECK: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
+    ; CHECK: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C2]](s32)
+    ; CHECK: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL11]]
+    ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY [[UV11]](s32)
+    ; CHECK: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
+    ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY [[UV12]](s32)
+    ; CHECK: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C3]]
+    ; CHECK: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C2]](s32)
+    ; CHECK: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL12]]
+    ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s32), [[OR13]](s32)
+    ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32)
+    ; CHECK: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C3]]
+    ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY [[UV13]](s32)
+    ; CHECK: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C3]]
+    ; CHECK: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C2]](s32)
+    ; CHECK: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL13]]
+    ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY [[UV14]](s32)
+    ; CHECK: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C3]]
+    ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32)
+    ; CHECK: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C3]]
+    ; CHECK: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND28]], [[C2]](s32)
+    ; CHECK: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND27]], [[SHL14]]
+    ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR15]](s32)
+    ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY [[UV15]](s32)
+    ; CHECK: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C3]]
+    ; CHECK: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND29]], [[SHL3]]
+    ; CHECK: [[OR17:%[0-9]+]]:_(s32) = G_OR [[COPY8]], [[SHL3]]
+    ; CHECK: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+    ; CHECK: [[MV9:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV9]](s384)
+    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s112)
     %0:_(s32) = COPY $vgpr0
     %1:_(s2) = G_TRUNC %0
     %2:_(s112) = G_ZEXT %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
index 5a7596a7efaa..db06742e233e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
@@ -432,16 +432,16 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(<3 x i32> addrspace(4)*
 ; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v3, 24, v4
 ; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v4, v5, s0, v6
 ; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v5, 16, v7
-; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v7, v9, v0, v10
 ; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v6, 24, v8
-; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v8, 16, v11
-; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v9, 24, v12
-; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v0, v1, v2, v3
-; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v1, v4, v5, v6
-; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v2, v7, v8, v9
-; GFX9-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX9-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s1, v1
-; GFX9-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v0, v9, v0, v10
+; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v7, 16, v11
+; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v8, 24, v12
+; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v1, v1, v2, v3
+; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v2, v4, v5, v6
+; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v0, v0, v7, v8
+; GFX9-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s0, v1
+; GFX9-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s1, v2
+; GFX9-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX9-NOUNALIGNED-NEXT:    ; return to shader part epilog
 ;
 ; GFX7-UNALIGNED-LABEL: s_load_constant_v3i32_align1:

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll
index eeef6bcade9a..8fbeccd9f2a0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll
@@ -37,114 +37,114 @@ define <3 x i32> @load_lds_v3i32_align1(<3 x i32> addrspace(3)* %ptr) {
 ; GFX9-LABEL: load_lds_v3i32_align1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v2, v0
-; GFX9-NEXT:    ds_read_u8 v0, v0
-; GFX9-NEXT:    ds_read_u8 v1, v2 offset:1
-; GFX9-NEXT:    ds_read_u8 v4, v2 offset:2
-; GFX9-NEXT:    ds_read_u8 v5, v2 offset:3
-; GFX9-NEXT:    ds_read_u8 v6, v2 offset:4
-; GFX9-NEXT:    ds_read_u8 v7, v2 offset:5
-; GFX9-NEXT:    ds_read_u8 v8, v2 offset:6
-; GFX9-NEXT:    ds_read_u8 v9, v2 offset:7
+; GFX9-NEXT:    ds_read_u8 v1, v0
+; GFX9-NEXT:    ds_read_u8 v3, v0 offset:1
+; GFX9-NEXT:    ds_read_u8 v4, v0 offset:2
+; GFX9-NEXT:    ds_read_u8 v5, v0 offset:3
+; GFX9-NEXT:    ds_read_u8 v6, v0 offset:4
+; GFX9-NEXT:    ds_read_u8 v7, v0 offset:5
+; GFX9-NEXT:    ds_read_u8 v8, v0 offset:6
+; GFX9-NEXT:    ds_read_u8 v9, v0 offset:7
 ; GFX9-NEXT:    s_mov_b32 s5, 8
 ; GFX9-NEXT:    s_movk_i32 s4, 0xff
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(6)
-; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX9-NEXT:    v_and_or_b32 v0, v0, s4, v1
+; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, s5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX9-NEXT:    v_and_or_b32 v1, v1, s4, v3
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(5)
-; GFX9-NEXT:    v_and_b32_e32 v1, s4, v4
+; GFX9-NEXT:    v_and_b32_e32 v3, s4, v4
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(4)
 ; GFX9-NEXT:    v_and_b32_e32 v4, s4, v5
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0xff
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0xff
+; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
-; GFX9-NEXT:    v_or3_b32 v0, v0, v1, v4
+; GFX9-NEXT:    v_or3_b32 v3, v1, v3, v4
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(2)
 ; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, s5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(1)
-; GFX9-NEXT:    v_and_b32_e32 v4, v8, v3
+; GFX9-NEXT:    v_and_b32_e32 v4, v8, v2
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_and_b32_e32 v5, v9, v3
+; GFX9-NEXT:    v_and_b32_e32 v5, v9, v2
 ; GFX9-NEXT:    v_and_or_b32 v1, v6, s4, v1
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v5, 24, v5
 ; GFX9-NEXT:    v_or3_b32 v1, v1, v4, v5
-; GFX9-NEXT:    ds_read_u8 v4, v2 offset:8
-; GFX9-NEXT:    ds_read_u8 v5, v2 offset:9
-; GFX9-NEXT:    ds_read_u8 v6, v2 offset:10
-; GFX9-NEXT:    ds_read_u8 v2, v2 offset:11
+; GFX9-NEXT:    ds_read_u8 v4, v0 offset:8
+; GFX9-NEXT:    ds_read_u8 v5, v0 offset:9
+; GFX9-NEXT:    ds_read_u8 v6, v0 offset:10
+; GFX9-NEXT:    ds_read_u8 v0, v0 offset:11
 ; GFX9-NEXT:    v_mov_b32_e32 v7, 8
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(2)
 ; GFX9-NEXT:    v_lshlrev_b32_sdwa v5, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX9-NEXT:    v_and_or_b32 v4, v4, v3, v5
+; GFX9-NEXT:    v_and_or_b32 v4, v4, v2, v5
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(1)
-; GFX9-NEXT:    v_and_b32_e32 v5, v6, v3
+; GFX9-NEXT:    v_and_b32_e32 v5, v6, v2
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_and_b32_e32 v2, v2, v3
+; GFX9-NEXT:    v_and_b32_e32 v0, v0, v2
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
-; GFX9-NEXT:    v_or3_b32 v2, v4, v5, v2
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
+; GFX9-NEXT:    v_or3_b32 v2, v4, v5, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX7-LABEL: load_lds_v3i32_align1:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    s_mov_b32 m0, -1
-; GFX7-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX7-NEXT:    s_movk_i32 s4, 0xff
-; GFX7-NEXT:    ds_read_u8 v0, v0
-; GFX7-NEXT:    ds_read_u8 v1, v2 offset:1
-; GFX7-NEXT:    ds_read_u8 v4, v2 offset:2
-; GFX7-NEXT:    ds_read_u8 v5, v2 offset:3
-; GFX7-NEXT:    ds_read_u8 v6, v2 offset:4
-; GFX7-NEXT:    ds_read_u8 v7, v2 offset:5
-; GFX7-NEXT:    ds_read_u8 v8, v2 offset:6
-; GFX7-NEXT:    ds_read_u8 v9, v2 offset:7
+; GFX7-NEXT:    ds_read_u8 v1, v0
+; GFX7-NEXT:    ds_read_u8 v3, v0 offset:1
+; GFX7-NEXT:    ds_read_u8 v4, v0 offset:2
+; GFX7-NEXT:    ds_read_u8 v5, v0 offset:3
+; GFX7-NEXT:    ds_read_u8 v6, v0 offset:4
+; GFX7-NEXT:    ds_read_u8 v7, v0 offset:5
+; GFX7-NEXT:    ds_read_u8 v8, v0 offset:6
+; GFX7-NEXT:    ds_read_u8 v9, v0 offset:7
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(6)
+; GFX7-NEXT:    v_and_b32_e32 v3, s4, v3
 ; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
-; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
-; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
-; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
+; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(5)
-; GFX7-NEXT:    v_and_b32_e32 v1, s4, v4
-; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX7-NEXT:    v_mov_b32_e32 v3, 0xff
-; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX7-NEXT:    v_and_b32_e32 v3, s4, v4
+; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
+; GFX7-NEXT:    v_mov_b32_e32 v2, 0xff
+; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(4)
-; GFX7-NEXT:    v_and_b32_e32 v1, s4, v5
+; GFX7-NEXT:    v_and_b32_e32 v3, s4, v5
+; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(2)
-; GFX7-NEXT:    v_and_b32_e32 v4, v7, v3
-; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
-; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX7-NEXT:    v_and_b32_e32 v4, v7, v2
+; GFX7-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX7-NEXT:    v_and_b32_e32 v1, s4, v6
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
 ; GFX7-NEXT:    v_or_b32_e32 v1, v1, v4
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(1)
-; GFX7-NEXT:    v_and_b32_e32 v4, v8, v3
+; GFX7-NEXT:    v_and_b32_e32 v4, v8, v2
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
 ; GFX7-NEXT:    v_or_b32_e32 v1, v1, v4
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    v_and_b32_e32 v4, v9, v3
+; GFX7-NEXT:    v_and_b32_e32 v4, v9, v2
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
 ; GFX7-NEXT:    v_or_b32_e32 v1, v1, v4
-; GFX7-NEXT:    ds_read_u8 v4, v2 offset:8
-; GFX7-NEXT:    ds_read_u8 v5, v2 offset:9
-; GFX7-NEXT:    ds_read_u8 v6, v2 offset:10
-; GFX7-NEXT:    ds_read_u8 v2, v2 offset:11
+; GFX7-NEXT:    ds_read_u8 v4, v0 offset:8
+; GFX7-NEXT:    ds_read_u8 v5, v0 offset:9
+; GFX7-NEXT:    ds_read_u8 v6, v0 offset:10
+; GFX7-NEXT:    ds_read_u8 v0, v0 offset:11
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(3)
-; GFX7-NEXT:    v_and_b32_e32 v4, v4, v3
+; GFX7-NEXT:    v_and_b32_e32 v4, v4, v2
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(2)
-; GFX7-NEXT:    v_and_b32_e32 v5, v5, v3
+; GFX7-NEXT:    v_and_b32_e32 v5, v5, v2
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 8, v5
 ; GFX7-NEXT:    v_or_b32_e32 v4, v4, v5
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(1)
-; GFX7-NEXT:    v_and_b32_e32 v5, v6, v3
+; GFX7-NEXT:    v_and_b32_e32 v5, v6, v2
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    v_and_b32_e32 v2, v2, v3
+; GFX7-NEXT:    v_and_b32_e32 v0, v0, v2
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
 ; GFX7-NEXT:    v_or_b32_e32 v4, v4, v5
-; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
-; GFX7-NEXT:    v_or_b32_e32 v2, v4, v2
+; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
+; GFX7-NEXT:    v_or_b32_e32 v2, v4, v0
+; GFX7-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: load_lds_v3i32_align1:

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll
index 2cffd9b15a2f..8d7208f72810 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll
@@ -174,61 +174,61 @@ define <3 x i32> @load_lds_v3i32_align1(<3 x i32> addrspace(3)* %ptr) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    s_mov_b32 m0, -1
-; GFX7-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX7-NEXT:    s_movk_i32 s4, 0xff
-; GFX7-NEXT:    ds_read_u8 v0, v0
-; GFX7-NEXT:    ds_read_u8 v1, v2 offset:1
-; GFX7-NEXT:    ds_read_u8 v4, v2 offset:2
-; GFX7-NEXT:    ds_read_u8 v5, v2 offset:3
-; GFX7-NEXT:    ds_read_u8 v6, v2 offset:4
-; GFX7-NEXT:    ds_read_u8 v7, v2 offset:5
-; GFX7-NEXT:    ds_read_u8 v8, v2 offset:6
-; GFX7-NEXT:    ds_read_u8 v9, v2 offset:7
+; GFX7-NEXT:    ds_read_u8 v1, v0
+; GFX7-NEXT:    ds_read_u8 v3, v0 offset:1
+; GFX7-NEXT:    ds_read_u8 v4, v0 offset:2
+; GFX7-NEXT:    ds_read_u8 v5, v0 offset:3
+; GFX7-NEXT:    ds_read_u8 v6, v0 offset:4
+; GFX7-NEXT:    ds_read_u8 v7, v0 offset:5
+; GFX7-NEXT:    ds_read_u8 v8, v0 offset:6
+; GFX7-NEXT:    ds_read_u8 v9, v0 offset:7
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(6)
+; GFX7-NEXT:    v_and_b32_e32 v3, s4, v3
 ; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
-; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
-; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
-; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
+; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(5)
-; GFX7-NEXT:    v_and_b32_e32 v1, s4, v4
-; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX7-NEXT:    v_mov_b32_e32 v3, 0xff
-; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX7-NEXT:    v_and_b32_e32 v3, s4, v4
+; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
+; GFX7-NEXT:    v_mov_b32_e32 v2, 0xff
+; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(4)
-; GFX7-NEXT:    v_and_b32_e32 v1, s4, v5
+; GFX7-NEXT:    v_and_b32_e32 v3, s4, v5
+; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(2)
-; GFX7-NEXT:    v_and_b32_e32 v4, v7, v3
-; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
-; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX7-NEXT:    v_and_b32_e32 v4, v7, v2
+; GFX7-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX7-NEXT:    v_and_b32_e32 v1, s4, v6
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
 ; GFX7-NEXT:    v_or_b32_e32 v1, v1, v4
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(1)
-; GFX7-NEXT:    v_and_b32_e32 v4, v8, v3
+; GFX7-NEXT:    v_and_b32_e32 v4, v8, v2
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
 ; GFX7-NEXT:    v_or_b32_e32 v1, v1, v4
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    v_and_b32_e32 v4, v9, v3
+; GFX7-NEXT:    v_and_b32_e32 v4, v9, v2
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
 ; GFX7-NEXT:    v_or_b32_e32 v1, v1, v4
-; GFX7-NEXT:    ds_read_u8 v4, v2 offset:8
-; GFX7-NEXT:    ds_read_u8 v5, v2 offset:9
-; GFX7-NEXT:    ds_read_u8 v6, v2 offset:10
-; GFX7-NEXT:    ds_read_u8 v2, v2 offset:11
+; GFX7-NEXT:    ds_read_u8 v4, v0 offset:8
+; GFX7-NEXT:    ds_read_u8 v5, v0 offset:9
+; GFX7-NEXT:    ds_read_u8 v6, v0 offset:10
+; GFX7-NEXT:    ds_read_u8 v0, v0 offset:11
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(3)
-; GFX7-NEXT:    v_and_b32_e32 v4, v4, v3
+; GFX7-NEXT:    v_and_b32_e32 v4, v4, v2
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(2)
-; GFX7-NEXT:    v_and_b32_e32 v5, v5, v3
+; GFX7-NEXT:    v_and_b32_e32 v5, v5, v2
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 8, v5
 ; GFX7-NEXT:    v_or_b32_e32 v4, v4, v5
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(1)
-; GFX7-NEXT:    v_and_b32_e32 v5, v6, v3
+; GFX7-NEXT:    v_and_b32_e32 v5, v6, v2
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    v_and_b32_e32 v2, v2, v3
+; GFX7-NEXT:    v_and_b32_e32 v0, v0, v2
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
 ; GFX7-NEXT:    v_or_b32_e32 v4, v4, v5
-; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
-; GFX7-NEXT:    v_or_b32_e32 v2, v4, v2
+; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
+; GFX7-NEXT:    v_or_b32_e32 v2, v4, v0
+; GFX7-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: load_lds_v3i32_align1:

diff  --git a/llvm/test/CodeGen/AMDGPU/ds-alignment.ll b/llvm/test/CodeGen/AMDGPU/ds-alignment.ll
index e3b00b3da878..fb81d4847028 100644
--- a/llvm/test/CodeGen/AMDGPU/ds-alignment.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds-alignment.ll
@@ -352,74 +352,63 @@ define amdgpu_kernel void @ds12align1(<3 x i32> addrspace(3)* %in, <3 x i32> add
 ; ALIGNED-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; ALIGNED-GISEL-NEXT:    s_mov_b32 s3, 8
 ; ALIGNED-GISEL-NEXT:    s_movk_i32 s2, 0xff
-; ALIGNED-GISEL-NEXT:    v_mov_b32_e32 v2, 0xff
+; ALIGNED-GISEL-NEXT:    v_mov_b32_e32 v1, 0xff
 ; ALIGNED-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; ALIGNED-GISEL-NEXT:    v_mov_b32_e32 v3, s0
-; ALIGNED-GISEL-NEXT:    ds_read_u8 v0, v3
-; ALIGNED-GISEL-NEXT:    ds_read_u8 v1, v3 offset:1
-; ALIGNED-GISEL-NEXT:    ds_read_u8 v4, v3 offset:2
-; ALIGNED-GISEL-NEXT:    ds_read_u8 v5, v3 offset:3
-; ALIGNED-GISEL-NEXT:    ds_read_u8 v6, v3 offset:4
-; ALIGNED-GISEL-NEXT:    ds_read_u8 v7, v3 offset:5
-; ALIGNED-GISEL-NEXT:    ds_read_u8 v8, v3 offset:6
-; ALIGNED-GISEL-NEXT:    ds_read_u8 v9, v3 offset:7
+; ALIGNED-GISEL-NEXT:    v_mov_b32_e32 v2, s0
+; ALIGNED-GISEL-NEXT:    ds_read_u8 v0, v2
+; ALIGNED-GISEL-NEXT:    ds_read_u8 v3, v2 offset:1
+; ALIGNED-GISEL-NEXT:    ds_read_u8 v4, v2 offset:2
+; ALIGNED-GISEL-NEXT:    ds_read_u8 v5, v2 offset:3
+; ALIGNED-GISEL-NEXT:    ds_read_u8 v6, v2 offset:4
+; ALIGNED-GISEL-NEXT:    ds_read_u8 v7, v2 offset:5
+; ALIGNED-GISEL-NEXT:    ds_read_u8 v8, v2 offset:6
+; ALIGNED-GISEL-NEXT:    ds_read_u8 v9, v2 offset:7
 ; ALIGNED-GISEL-NEXT:    s_waitcnt lgkmcnt(6)
-; ALIGNED-GISEL-NEXT:    v_lshlrev_b32_sdwa v1, s3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; ALIGNED-GISEL-NEXT:    v_and_or_b32 v0, v0, s2, v1
+; ALIGNED-GISEL-NEXT:    v_lshlrev_b32_sdwa v3, s3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; ALIGNED-GISEL-NEXT:    v_and_or_b32 v0, v0, s2, v3
 ; ALIGNED-GISEL-NEXT:    s_waitcnt lgkmcnt(5)
-; ALIGNED-GISEL-NEXT:    v_and_b32_e32 v1, s2, v4
+; ALIGNED-GISEL-NEXT:    v_and_b32_e32 v3, s2, v4
 ; ALIGNED-GISEL-NEXT:    s_waitcnt lgkmcnt(4)
 ; ALIGNED-GISEL-NEXT:    v_and_b32_e32 v4, s2, v5
-; ALIGNED-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; ALIGNED-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; ALIGNED-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
-; ALIGNED-GISEL-NEXT:    v_or3_b32 v0, v0, v1, v4
-; ALIGNED-GISEL-NEXT:    s_waitcnt lgkmcnt(2)
-; ALIGNED-GISEL-NEXT:    v_lshlrev_b32_sdwa v1, s3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; ALIGNED-GISEL-NEXT:    v_or3_b32 v0, v0, v3, v4
 ; ALIGNED-GISEL-NEXT:    s_waitcnt lgkmcnt(1)
-; ALIGNED-GISEL-NEXT:    v_and_b32_e32 v4, v8, v2
+; ALIGNED-GISEL-NEXT:    v_and_b32_e32 v4, v8, v1
+; ALIGNED-GISEL-NEXT:    v_lshlrev_b32_sdwa v3, s3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 ; ALIGNED-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; ALIGNED-GISEL-NEXT:    v_and_b32_e32 v5, v9, v2
-; ALIGNED-GISEL-NEXT:    v_and_or_b32 v1, v6, s2, v1
+; ALIGNED-GISEL-NEXT:    v_and_b32_e32 v1, v9, v1
+; ALIGNED-GISEL-NEXT:    v_and_or_b32 v3, v6, s2, v3
 ; ALIGNED-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
-; ALIGNED-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 24, v5
-; ALIGNED-GISEL-NEXT:    v_or3_b32 v1, v1, v4, v5
-; ALIGNED-GISEL-NEXT:    ds_read_u8 v4, v3 offset:8
-; ALIGNED-GISEL-NEXT:    ds_read_u8 v5, v3 offset:9
-; ALIGNED-GISEL-NEXT:    ds_read_u8 v6, v3 offset:10
-; ALIGNED-GISEL-NEXT:    ds_read_u8 v3, v3 offset:11
-; ALIGNED-GISEL-NEXT:    v_mov_b32_e32 v7, 8
-; ALIGNED-GISEL-NEXT:    s_waitcnt lgkmcnt(2)
-; ALIGNED-GISEL-NEXT:    v_lshlrev_b32_sdwa v5, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; ALIGNED-GISEL-NEXT:    v_and_or_b32 v4, v4, v2, v5
-; ALIGNED-GISEL-NEXT:    s_waitcnt lgkmcnt(1)
-; ALIGNED-GISEL-NEXT:    v_and_b32_e32 v5, v6, v2
-; ALIGNED-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; ALIGNED-GISEL-NEXT:    v_and_b32_e32 v2, v3, v2
-; ALIGNED-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
-; ALIGNED-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
-; ALIGNED-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
-; ALIGNED-GISEL-NEXT:    v_mov_b32_e32 v6, s1
-; ALIGNED-GISEL-NEXT:    v_or3_b32 v2, v4, v5, v2
-; ALIGNED-GISEL-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
-; ALIGNED-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
-; ALIGNED-GISEL-NEXT:    ds_write_b8 v6, v0
-; ALIGNED-GISEL-NEXT:    ds_write_b8 v6, v3 offset:1
-; ALIGNED-GISEL-NEXT:    ds_write_b8 v6, v4 offset:2
-; ALIGNED-GISEL-NEXT:    ds_write_b8 v6, v5 offset:3
+; ALIGNED-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
+; ALIGNED-GISEL-NEXT:    v_or3_b32 v1, v3, v4, v1
+; ALIGNED-GISEL-NEXT:    ds_read_u8 v3, v2 offset:8
+; ALIGNED-GISEL-NEXT:    ds_read_u8 v4, v2 offset:9
+; ALIGNED-GISEL-NEXT:    ds_read_u8 v5, v2 offset:10
+; ALIGNED-GISEL-NEXT:    ds_read_u8 v2, v2 offset:11
+; ALIGNED-GISEL-NEXT:    v_lshrrev_b32_e32 v6, 8, v0
+; ALIGNED-GISEL-NEXT:    v_lshrrev_b32_e32 v7, 16, v0
+; ALIGNED-GISEL-NEXT:    v_mov_b32_e32 v9, s1
+; ALIGNED-GISEL-NEXT:    v_lshrrev_b32_e32 v8, 24, v0
+; ALIGNED-GISEL-NEXT:    ds_write_b8 v9, v0
+; ALIGNED-GISEL-NEXT:    ds_write_b8 v9, v6 offset:1
+; ALIGNED-GISEL-NEXT:    ds_write_b8 v9, v7 offset:2
+; ALIGNED-GISEL-NEXT:    ds_write_b8 v9, v8 offset:3
 ; ALIGNED-GISEL-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
-; ALIGNED-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; ALIGNED-GISEL-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
-; ALIGNED-GISEL-NEXT:    ds_write_b8 v6, v1 offset:4
-; ALIGNED-GISEL-NEXT:    ds_write_b8 v6, v0 offset:5
-; ALIGNED-GISEL-NEXT:    ds_write_b8 v6, v3 offset:6
-; ALIGNED-GISEL-NEXT:    ds_write_b8 v6, v4 offset:7
-; ALIGNED-GISEL-NEXT:    v_lshrrev_b32_e32 v0, 8, v2
-; ALIGNED-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
-; ALIGNED-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
-; ALIGNED-GISEL-NEXT:    ds_write_b8 v6, v2 offset:8
-; ALIGNED-GISEL-NEXT:    ds_write_b8 v6, v0 offset:9
-; ALIGNED-GISEL-NEXT:    ds_write_b8 v6, v1 offset:10
-; ALIGNED-GISEL-NEXT:    ds_write_b8 v6, v3 offset:11
+; ALIGNED-GISEL-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; ALIGNED-GISEL-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
+; ALIGNED-GISEL-NEXT:    ds_write_b8 v9, v1 offset:4
+; ALIGNED-GISEL-NEXT:    ds_write_b8 v9, v0 offset:5
+; ALIGNED-GISEL-NEXT:    ds_write_b8 v9, v6 offset:6
+; ALIGNED-GISEL-NEXT:    ds_write_b8 v9, v7 offset:7
+; ALIGNED-GISEL-NEXT:    s_waitcnt lgkmcnt(11)
+; ALIGNED-GISEL-NEXT:    ds_write_b8 v9, v3 offset:8
+; ALIGNED-GISEL-NEXT:    s_waitcnt lgkmcnt(11)
+; ALIGNED-GISEL-NEXT:    ds_write_b8 v9, v4 offset:9
+; ALIGNED-GISEL-NEXT:    s_waitcnt lgkmcnt(11)
+; ALIGNED-GISEL-NEXT:    ds_write_b8 v9, v5 offset:10
+; ALIGNED-GISEL-NEXT:    s_waitcnt lgkmcnt(11)
+; ALIGNED-GISEL-NEXT:    ds_write_b8 v9, v2 offset:11
 ; ALIGNED-GISEL-NEXT:    s_endpgm
 ;
 ; UNALIGNED-LABEL: ds12align1:
@@ -480,20 +469,17 @@ define amdgpu_kernel void @ds12align2(<3 x i32> addrspace(3)* %in, <3 x i32> add
 ; ALIGNED-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 ; ALIGNED-GISEL-NEXT:    v_and_or_b32 v0, v1, s2, v0
 ; ALIGNED-GISEL-NEXT:    v_and_or_b32 v1, v3, s2, v2
-; ALIGNED-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; ALIGNED-GISEL-NEXT:    v_and_b32_e32 v2, s2, v6
-; ALIGNED-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; ALIGNED-GISEL-NEXT:    v_mov_b32_e32 v4, s1
-; ALIGNED-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; ALIGNED-GISEL-NEXT:    ds_write_b16 v4, v0
-; ALIGNED-GISEL-NEXT:    ds_write_b16 v4, v3 offset:2
+; ALIGNED-GISEL-NEXT:    v_mov_b32_e32 v3, s1
+; ALIGNED-GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; ALIGNED-GISEL-NEXT:    ds_write_b16 v3, v0
+; ALIGNED-GISEL-NEXT:    ds_write_b16 v3, v2 offset:2
 ; ALIGNED-GISEL-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
-; ALIGNED-GISEL-NEXT:    v_and_or_b32 v2, v5, s2, v2
-; ALIGNED-GISEL-NEXT:    ds_write_b16 v4, v1 offset:4
-; ALIGNED-GISEL-NEXT:    ds_write_b16 v4, v0 offset:6
-; ALIGNED-GISEL-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
-; ALIGNED-GISEL-NEXT:    ds_write_b16 v4, v2 offset:8
-; ALIGNED-GISEL-NEXT:    ds_write_b16 v4, v0 offset:10
+; ALIGNED-GISEL-NEXT:    ds_write_b16 v3, v1 offset:4
+; ALIGNED-GISEL-NEXT:    ds_write_b16 v3, v0 offset:6
+; ALIGNED-GISEL-NEXT:    s_waitcnt lgkmcnt(5)
+; ALIGNED-GISEL-NEXT:    ds_write_b16 v3, v5 offset:8
+; ALIGNED-GISEL-NEXT:    s_waitcnt lgkmcnt(5)
+; ALIGNED-GISEL-NEXT:    ds_write_b16 v3, v6 offset:10
 ; ALIGNED-GISEL-NEXT:    s_endpgm
 ;
 ; UNALIGNED-LABEL: ds12align2:

diff  --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
index 3bdb1b4c4201..99faf74965cc 100644
--- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
@@ -1525,11 +1525,11 @@ TEST_F(AArch64GISelMITest, FewerElementsPhi) {
   CHECK: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[EXTRACT2]]:_(s32), %bb.0, [[EXTRACT5]]:_(s32), %bb.1
 
   CHECK: [[OTHER_PHI:%[0-9]+]]:_(s64) = G_PHI
-  CHECK: [[REBUILD_VAL_IMPDEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
-  CHECK: [[INSERT0:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[REBUILD_VAL_IMPDEF]]:_, [[PHI0]]:_(<2 x s32>), 0
-  CHECK: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT0]]:_, [[PHI1]]:_(<2 x s32>), 64
-  CHECK: [[INSERT2:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT1]]:_, [[PHI2]]:_(s32), 128
-  CHECK: [[USE_OP:%[0-9]+]]:_(<5 x s32>) = G_AND [[INSERT2]]:_, [[INSERT2]]:_
+
+  CHECK: [[UNMERGE0:%[0-9]+]]:_(s32), [[UNMERGE1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PHI0]]:_(<2 x s32>)
+  CHECK: [[UNMERGE2:%[0-9]+]]:_(s32), [[UNMERGE3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PHI1]]:_(<2 x s32>)
+  CHECK: [[BV:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UNMERGE0]]:_(s32), [[UNMERGE1]]:_(s32), [[UNMERGE2]]:_(s32), [[UNMERGE3]]:_(s32), [[PHI2]]:_(s32)
+  CHECK: [[USE_OP:%[0-9]+]]:_(<5 x s32>) = G_AND [[BV]]:_, [[BV]]:_
   )";
 
   EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;


        


More information about the llvm-commits mailing list