[llvm] 05ecd7a - AMDGPU/GlobalISel: Fix tests using illegal copies to physregs

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 30 09:37:41 PDT 2021


Author: Matt Arsenault
Date: 2021-07-30T12:37:29-04:00
New Revision: 05ecd7a2acbf62d415c8529108944966f6425b33

URL: https://github.com/llvm/llvm-project/commit/05ecd7a2acbf62d415c8529108944966f6425b33
DIFF: https://github.com/llvm/llvm-project/commit/05ecd7a2acbf62d415c8529108944966f6425b33.diff

LOG: AMDGPU/GlobalISel: Fix tests using illegal copies to physregs

These are unlegalizable and introduce spurious failures. Ideally the
verifier would reject them. Also avoid some weird G_INSERTs.

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
index 600373818750..eaac2ef51df0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
@@ -3063,15 +3063,31 @@ body: |
     ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI: $vgpr0 = COPY [[COPY5]](s32)
     ; VI-LABEL: name: test_load_constant_v3s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
@@ -3081,15 +3097,27 @@ body: |
     ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_constant_v3s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
@@ -3099,24 +3127,31 @@ body: |
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 4, addrspace 4)
-    %2:_(<4 x s8>) = G_IMPLICIT_DEF
-    %3:_(<4 x s8>) = G_INSERT %2, %1, 0
+    %2:_(s24) = G_BITCAST %1
+    %3:_(s32) = G_ANYEXT %2
     $vgpr0 = COPY %3
 ...
 
@@ -3135,15 +3170,32 @@ body: |
     ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI: $vgpr0 = COPY [[COPY4]](s32)
     ; VI-LABEL: name: test_load_constant_v3s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
@@ -3153,15 +3205,28 @@ body: |
     ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_constant_v3s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
@@ -3171,24 +3236,32 @@ body: |
     ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
     ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 4)
-    %2:_(<4 x s8>) = G_IMPLICIT_DEF
-    %3:_(<4 x s8>) = G_INSERT %2, %1, 0
+    %2:_(s24) = G_BITCAST %1
+    %3:_(s32) = G_ANYEXT %2
     $vgpr0 = COPY %3
 ...
 
@@ -3207,13 +3280,22 @@ body: |
     ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_constant_v4s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
@@ -3223,13 +3305,22 @@ body: |
     ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_constant_v4s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4)
@@ -3239,18 +3330,26 @@ body: |
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 4)
-    $vgpr0 = COPY %1
+    %2:_(s32) = G_BITCAST %1
+    $vgpr0 = COPY %2
 ...
 
 ---
@@ -3274,13 +3373,22 @@ body: |
     ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
     ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
     ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_constant_v4s8_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
@@ -3296,13 +3404,22 @@ body: |
     ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
     ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
     ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_constant_v4s8_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
@@ -3318,18 +3435,26 @@ body: |
     ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
     ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
     ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]]
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 2, addrspace 4)
-    $vgpr0 = COPY %1
+    %2:_(s32) = G_BITCAST %1
+    $vgpr0 = COPY %2
 ...
 
 ---
@@ -3350,13 +3475,25 @@ body: |
     ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
     ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
     ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_constant_v4s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
@@ -3369,13 +3506,25 @@ body: |
     ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
     ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
     ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_constant_v4s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
@@ -3388,18 +3537,29 @@ body: |
     ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
     ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
     ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4)
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 1, addrspace 4)
-    $vgpr0 = COPY %1
+    %2:_(s32) = G_BITCAST %1
+    $vgpr0 = COPY %2
 ...
 
 ---
@@ -3418,23 +3578,40 @@ body: |
     ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>)
+    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_constant_v8s8_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
@@ -3445,23 +3622,40 @@ body: |
     ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_constant_v8s8_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
@@ -3472,30 +3666,44 @@ body: |
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>)
+    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 4)
-    $vgpr0_vgpr1 = COPY %1
+    %2:_(<2 x s32>) = G_BITCAST %1
+    $vgpr0_vgpr1 = COPY %2
 ...
 
 ---
@@ -4117,31 +4325,114 @@ body: |
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
     ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_constant_v3s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v3s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 4)
-    %2:_(<4 x s16>) = G_IMPLICIT_DEF
-    %3:_(<4 x s16>) = G_INSERT %2, %1, 0
-    $vgpr0_vgpr1 = COPY %3
+    %2:_(<3 x s16>) = G_IMPLICIT_DEF
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
 ...
 
 ---
@@ -4154,25 +4445,34 @@ body: |
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; CI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p4) :: (load (<3 x s16>), align 4, addrspace 4)
     ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
     ; VI-LABEL: name: test_load_constant_v3s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p4) :: (load (<3 x s16>), align 4, addrspace 4)
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v3s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p4) :: (load (<3 x s16>), align 4, addrspace 4)
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 4)
-    %2:_(<4 x s16>) = G_IMPLICIT_DEF
-    %3:_(<4 x s16>) = G_INSERT %2, %1, 0
-    $vgpr0_vgpr1 = COPY %3
+    %2:_(<3 x s16>) = G_IMPLICIT_DEF
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
 ...
 
 ---
@@ -4190,27 +4490,40 @@ body: |
     ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
     ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
     ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_constant_v3s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
@@ -4220,27 +4533,40 @@ body: |
     ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
     ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
     ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v3s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4)
@@ -4250,23 +4576,32 @@ body: |
     ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
     ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
     ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 4)
-    %2:_(<4 x s16>) = G_IMPLICIT_DEF
-    %3:_(<4 x s16>) = G_INSERT %2, %1, 0
-    $vgpr0_vgpr1 = COPY %3
+    %2:_(<3 x s16>) = G_IMPLICIT_DEF
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
 ...
 
 ---
@@ -4317,23 +4652,37 @@ body: |
     ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
     ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
     ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32)
     ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
     ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
     ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
     ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
     ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_constant_v3s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
@@ -4370,23 +4719,37 @@ body: |
     ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32)
     ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
     ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
     ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
     ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
     ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]]
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_constant_v3s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4)
@@ -4423,23 +4786,32 @@ body: |
     ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32)
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
     ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 4)
-    %2:_(<4 x s16>) = G_IMPLICIT_DEF
-    %3:_(<4 x s16>) = G_INSERT %2, %1, 0
-    $vgpr0_vgpr1 = COPY %3
+    %2:_(<3 x s16>) = G_IMPLICIT_DEF
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
 ...
 
 ---

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
index c11148fce00e..e6747716dda1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
@@ -2797,15 +2797,31 @@ body: |
     ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI: $vgpr0 = COPY [[COPY5]](s32)
     ; VI-LABEL: name: test_load_flat_v3s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
@@ -2815,15 +2831,27 @@ body: |
     ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_flat_v3s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
@@ -2833,24 +2861,31 @@ body: |
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 4, addrspace 0)
-    %2:_(<4 x s8>) = G_IMPLICIT_DEF
-    %3:_(<4 x s8>) = G_INSERT %2, %1, 0
+    %2:_(s24) = G_BITCAST %1
+    %3:_(s32) = G_ANYEXT %2
     $vgpr0 = COPY %3
 ...
 
@@ -2869,15 +2904,32 @@ body: |
     ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI: $vgpr0 = COPY [[COPY4]](s32)
     ; VI-LABEL: name: test_load_flat_v3s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
@@ -2887,15 +2939,28 @@ body: |
     ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_flat_v3s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
@@ -2905,24 +2970,32 @@ body: |
     ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
     ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 0)
-    %2:_(<4 x s8>) = G_IMPLICIT_DEF
-    %3:_(<4 x s8>) = G_INSERT %2, %1, 0
+    %2:_(s24) = G_BITCAST %1
+    %3:_(s32) = G_ANYEXT %2
     $vgpr0 = COPY %3
 ...
 
@@ -2941,13 +3014,22 @@ body: |
     ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_flat_v4s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
@@ -2957,13 +3039,22 @@ body: |
     ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_flat_v4s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
@@ -2973,18 +3064,26 @@ body: |
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 0)
-    $vgpr0 = COPY %1
+    %2:_(s32) = G_BITCAST %1
+    $vgpr0 = COPY %2
 ...
 
 ---
@@ -3008,13 +3107,22 @@ body: |
     ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
     ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
     ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_flat_v4s8_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
@@ -3030,13 +3138,22 @@ body: |
     ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
     ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
     ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_flat_v4s8_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
@@ -3052,18 +3169,26 @@ body: |
     ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
     ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
     ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]]
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 2, addrspace 0)
-    $vgpr0 = COPY %1
+    %2:_(s32) = G_BITCAST %1
+    $vgpr0 = COPY %2
 ...
 
 ---
@@ -3084,13 +3209,25 @@ body: |
     ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
     ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
     ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_flat_v4s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
@@ -3103,13 +3240,25 @@ body: |
     ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
     ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
     ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_flat_v4s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
@@ -3122,18 +3271,29 @@ body: |
     ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
     ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
     ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3)
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 1, addrspace 0)
-    $vgpr0 = COPY %1
+    %2:_(s32) = G_BITCAST %1
+    $vgpr0 = COPY %2
 ...
 
 ---
@@ -3152,23 +3312,40 @@ body: |
     ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>)
+    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_flat_v8s8_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
@@ -3179,23 +3356,40 @@ body: |
     ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_flat_v8s8_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
@@ -3206,30 +3400,44 @@ body: |
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>)
+    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 0)
-    $vgpr0_vgpr1 = COPY %1
+    %2:_(<2 x s32>) = G_BITCAST %1
+    $vgpr0_vgpr1 = COPY %2
 ...
 
 ---
@@ -3248,41 +3456,74 @@ body: |
     ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; CI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; CI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; CI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; CI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; CI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; CI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
     ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; CI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
-    ; CI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; CI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; CI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
     ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; CI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>)
+    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; VI-LABEL: name: test_load_flat_v16s8_align16
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
@@ -3293,41 +3534,74 @@ body: |
     ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
     ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
     ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; VI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>)
+    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_flat_v16s8_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>))
@@ -3338,52 +3612,78 @@ body: |
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; GFX9: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; GFX9: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; GFX9: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
-    ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; GFX9: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
     ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
+    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>)
-    ; GFX9: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; GFX9: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; GFX9: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
     ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
-    ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>)
-    ; GFX9: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>)
+    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 16, addrspace 0)
-    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+    %2:_(<4 x s32>) = G_BITCAST %1
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
 ...
 
 ---
@@ -3406,77 +3706,142 @@ body: |
     ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
     ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32)
+    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32)
+    ; CI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; CI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32)
+    ; CI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; CI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; CI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32)
+    ; CI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; CI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; CI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32)
+    ; CI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; CI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; CI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32)
+    ; CI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; CI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; CI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32)
+    ; CI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; CI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; CI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C3]](s32)
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]]
     ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]]
+    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]]
+    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
+    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; CI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; CI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; CI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32)
+    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]]
+    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
+    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]]
     ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]]
+    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
+    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]]
+    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32)
+    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; CI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
-    ; CI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; CI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; CI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32)
+    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C4]]
+    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32)
+    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C4]]
     ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C4]]
+    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
+    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C4]]
+    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32)
+    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; CI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; CI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; CI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; CI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32)
+    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C4]]
+    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32)
+    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; CI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
+    ; CI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C4]]
     ; CI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32)
+    ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C4]]
+    ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C1]](s32)
+    ; CI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
     ; CI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32)
+    ; CI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C4]]
+    ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C2]](s32)
+    ; CI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
     ; CI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32)
-    ; CI: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32)
-    ; CI: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR4]](<4 x s32>)
-    ; CI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; CI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; CI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32)
+    ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C4]]
+    ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C3]](s32)
+    ; CI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
     ; CI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[UV5]](s32)
+    ; CI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C4]]
     ; CI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32)
+    ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C4]]
+    ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C1]](s32)
+    ; CI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
     ; CI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32)
+    ; CI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C4]]
+    ; CI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C2]](s32)
+    ; CI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
     ; CI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32)
-    ; CI: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32)
-    ; CI: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR5]](<4 x s32>)
-    ; CI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; CI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; CI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32)
+    ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C4]]
+    ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C3]](s32)
+    ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
     ; CI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[UV6]](s32)
+    ; CI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C4]]
     ; CI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32)
+    ; CI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C4]]
+    ; CI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C1]](s32)
+    ; CI: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
     ; CI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32)
+    ; CI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C4]]
+    ; CI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C2]](s32)
+    ; CI: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
     ; CI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32)
-    ; CI: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32)
-    ; CI: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR6]](<4 x s32>)
-    ; CI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; CI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; CI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C3]](s32)
+    ; CI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C4]]
+    ; CI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C3]](s32)
+    ; CI: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
     ; CI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[UV7]](s32)
+    ; CI: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C4]]
     ; CI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32)
+    ; CI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C4]]
+    ; CI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C1]](s32)
+    ; CI: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
     ; CI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32)
+    ; CI: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C4]]
+    ; CI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C2]](s32)
+    ; CI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
     ; CI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32)
-    ; CI: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32)
-    ; CI: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x s32>)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>)
+    ; CI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY32]], [[C4]]
+    ; CI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C3]](s32)
+    ; CI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     ; VI-LABEL: name: test_load_flat_v32s8_align32
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
@@ -3491,77 +3856,142 @@ body: |
     ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
     ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32)
+    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32)
+    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32)
+    ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32)
+    ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32)
+    ; VI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; VI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; VI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32)
+    ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; VI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C3]](s32)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]]
     ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]]
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]]
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]]
     ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]]
+    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
+    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]]
+    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32)
+    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32)
+    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C4]]
+    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32)
+    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C4]]
     ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C4]]
+    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
+    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C4]]
+    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32)
+    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; VI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32)
+    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C4]]
+    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32)
+    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
+    ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C4]]
     ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32)
+    ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C4]]
+    ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C1]](s32)
+    ; VI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
     ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32)
+    ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C4]]
+    ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C2]](s32)
+    ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
     ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32)
-    ; VI: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32)
-    ; VI: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR4]](<4 x s32>)
-    ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32)
+    ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C4]]
+    ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C3]](s32)
+    ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
     ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[UV5]](s32)
+    ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C4]]
     ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32)
+    ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C4]]
+    ; VI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C1]](s32)
+    ; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
     ; VI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32)
+    ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C4]]
+    ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C2]](s32)
+    ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
     ; VI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32)
-    ; VI: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32)
-    ; VI: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR5]](<4 x s32>)
-    ; VI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; VI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; VI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32)
+    ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C4]]
+    ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C3]](s32)
+    ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
     ; VI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[UV6]](s32)
+    ; VI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C4]]
     ; VI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32)
+    ; VI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C4]]
+    ; VI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C1]](s32)
+    ; VI: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
     ; VI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32)
+    ; VI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C4]]
+    ; VI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C2]](s32)
+    ; VI: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
     ; VI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32)
-    ; VI: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32)
-    ; VI: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR6]](<4 x s32>)
-    ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; VI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C3]](s32)
+    ; VI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C4]]
+    ; VI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C3]](s32)
+    ; VI: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
     ; VI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[UV7]](s32)
+    ; VI: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C4]]
     ; VI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32)
+    ; VI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C4]]
+    ; VI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C1]](s32)
+    ; VI: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
     ; VI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32)
+    ; VI: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C4]]
+    ; VI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C2]](s32)
+    ; VI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
     ; VI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32)
-    ; VI: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32)
-    ; VI: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x s32>)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>)
+    ; VI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY32]], [[C4]]
+    ; VI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C3]](s32)
+    ; VI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     ; GFX9-LABEL: name: test_load_flat_v32s8_align32
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 32)
@@ -3576,96 +4006,146 @@ body: |
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
     ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32)
+    ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32)
+    ; GFX9: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; GFX9: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32)
+    ; GFX9: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; GFX9: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; GFX9: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32)
+    ; GFX9: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; GFX9: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; GFX9: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32)
+    ; GFX9: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; GFX9: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; GFX9: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32)
+    ; GFX9: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; GFX9: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; GFX9: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C3]](s32)
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]]
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]]
     ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]]
+    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]]
+    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
+    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
-    ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; GFX9: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32)
+    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]]
+    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
+    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]]
     ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
+    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]]
+    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
+    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]]
+    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32)
+    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>)
-    ; GFX9: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; GFX9: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; GFX9: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32)
+    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C4]]
+    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32)
+    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C4]]
     ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C4]]
+    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
+    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C4]]
+    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32)
+    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
-    ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>)
-    ; GFX9: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; GFX9: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
-    ; GFX9: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32)
+    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C4]]
+    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32)
+    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
+    ; GFX9: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C4]]
     ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[COPY18]](s32)
+    ; GFX9: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C4]]
+    ; GFX9: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C1]](s32)
+    ; GFX9: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
     ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32)
+    ; GFX9: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C4]]
+    ; GFX9: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C2]](s32)
+    ; GFX9: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
     ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
-    ; GFX9: [[CONCAT_VECTORS4:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>)
-    ; GFX9: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS4]](<4 x s16>)
-    ; GFX9: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; GFX9: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
-    ; GFX9: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32)
+    ; GFX9: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C4]]
+    ; GFX9: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C3]](s32)
+    ; GFX9: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
     ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[UV5]](s32)
+    ; GFX9: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C4]]
     ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+    ; GFX9: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C4]]
+    ; GFX9: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C1]](s32)
+    ; GFX9: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
     ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32)
+    ; GFX9: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C4]]
+    ; GFX9: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C2]](s32)
+    ; GFX9: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
     ; GFX9: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
-    ; GFX9: [[CONCAT_VECTORS5:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[BUILD_VECTOR_TRUNC11]](<2 x s16>)
-    ; GFX9: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS5]](<4 x s16>)
-    ; GFX9: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; GFX9: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
-    ; GFX9: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32)
+    ; GFX9: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C4]]
+    ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C3]](s32)
+    ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
     ; GFX9: [[COPY25:%[0-9]+]]:_(s32) = COPY [[UV6]](s32)
+    ; GFX9: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C4]]
     ; GFX9: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[COPY26]](s32)
+    ; GFX9: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C4]]
+    ; GFX9: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C1]](s32)
+    ; GFX9: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
     ; GFX9: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32)
+    ; GFX9: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C4]]
+    ; GFX9: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C2]](s32)
+    ; GFX9: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
     ; GFX9: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[COPY28]](s32)
-    ; GFX9: [[CONCAT_VECTORS6:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC12]](<2 x s16>), [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
-    ; GFX9: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS6]](<4 x s16>)
-    ; GFX9: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; GFX9: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
-    ; GFX9: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C3]](s32)
+    ; GFX9: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C4]]
+    ; GFX9: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C3]](s32)
+    ; GFX9: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
     ; GFX9: [[COPY29:%[0-9]+]]:_(s32) = COPY [[UV7]](s32)
+    ; GFX9: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C4]]
     ; GFX9: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY29]](s32), [[COPY30]](s32)
+    ; GFX9: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C4]]
+    ; GFX9: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C1]](s32)
+    ; GFX9: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
     ; GFX9: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32)
+    ; GFX9: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C4]]
+    ; GFX9: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C2]](s32)
+    ; GFX9: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
     ; GFX9: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC15:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY31]](s32), [[COPY32]](s32)
-    ; GFX9: [[CONCAT_VECTORS7:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC14]](<2 x s16>), [[BUILD_VECTOR_TRUNC15]](<2 x s16>)
-    ; GFX9: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS7]](<4 x s16>)
-    ; GFX9: [[CONCAT_VECTORS8:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS8]](<32 x s8>)
+    ; GFX9: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY32]], [[C4]]
+    ; GFX9: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C3]](s32)
+    ; GFX9: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<32 x s8>) = G_LOAD %0 :: (load (<32 x s8>), align 32, addrspace 0)
-    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
+    %2:_(<8 x s32>) = G_BITCAST %1
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2
 ...
 
 ---
@@ -3863,31 +4343,114 @@ body: |
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
     ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_flat_v3s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v3s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 0)
-    %2:_(<4 x s16>) = G_IMPLICIT_DEF
-    %3:_(<4 x s16>) = G_INSERT %2, %1, 0
-    $vgpr0_vgpr1 = COPY %3
+    %2:_(<3 x s16>) = G_IMPLICIT_DEF
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
 ...
 
 ---
@@ -3900,25 +4463,34 @@ body: |
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; CI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p0) :: (load (<3 x s16>), align 4)
     ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
     ; VI-LABEL: name: test_load_flat_v3s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p0) :: (load (<3 x s16>), align 4)
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v3s16_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p0) :: (load (<3 x s16>), align 4)
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 0)
-    %2:_(<4 x s16>) = G_IMPLICIT_DEF
-    %3:_(<4 x s16>) = G_INSERT %2, %1, 0
-    $vgpr0_vgpr1 = COPY %3
+    %2:_(<3 x s16>) = G_IMPLICIT_DEF
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
 ...
 
 ---
@@ -3936,27 +4508,40 @@ body: |
     ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
     ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
     ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_flat_v3s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
@@ -3966,27 +4551,40 @@ body: |
     ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
     ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
     ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v3s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
@@ -3996,23 +4594,32 @@ body: |
     ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
     ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
     ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 0)
-    %2:_(<4 x s16>) = G_IMPLICIT_DEF
-    %3:_(<4 x s16>) = G_INSERT %2, %1, 0
-    $vgpr0_vgpr1 = COPY %3
+    %2:_(<3 x s16>) = G_IMPLICIT_DEF
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
 ...
 
 ---
@@ -4063,23 +4670,37 @@ body: |
     ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
     ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
     ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32)
     ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
     ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
     ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
     ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
     ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_flat_v3s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
@@ -4116,23 +4737,37 @@ body: |
     ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32)
     ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
     ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
     ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
     ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
     ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]]
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_flat_v3s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
@@ -4169,23 +4804,32 @@ body: |
     ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32)
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
     ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 0)
-    %2:_(<4 x s16>) = G_IMPLICIT_DEF
-    %3:_(<4 x s16>) = G_INSERT %2, %1, 0
-    $vgpr0_vgpr1 = COPY %3
+    %2:_(<3 x s16>) = G_IMPLICIT_DEF
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
 ...
 
 ---

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
index 872f27dd26c6..4d885cdd0637 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
@@ -4572,15 +4572,31 @@ body: |
     ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; SI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; SI: $vgpr0 = COPY [[COPY5]](s32)
     ; CI-HSA-LABEL: name: test_load_global_v3s8_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
@@ -4590,15 +4606,31 @@ body: |
     ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-HSA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; CI-HSA: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-HSA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; CI-HSA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-HSA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-HSA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI-HSA: $vgpr0 = COPY [[COPY5]](s32)
     ; CI-MESA-LABEL: name: test_load_global_v3s8_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
@@ -4608,15 +4640,31 @@ body: |
     ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; CI-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[COPY5]](s32)
     ; VI-LABEL: name: test_load_global_v3s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
@@ -4626,15 +4674,27 @@ body: |
     ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s8_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
@@ -4644,20 +4704,27 @@ body: |
     ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>)
-    ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>)
-    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; GFX9-HSA: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-HSA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; GFX9-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-HSA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; GFX9-HSA: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-HSA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX9-HSA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; GFX9-HSA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-HSA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; GFX9-HSA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; GFX9-HSA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-HSA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-HSA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX9-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s8_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
@@ -4667,24 +4734,31 @@ body: |
     ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>)
-    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>)
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; GFX9-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 4, addrspace 1)
-    %2:_(<4 x s8>) = G_IMPLICIT_DEF
-    %3:_(<4 x s8>) = G_INSERT %2, %1, 0
+    %2:_(s24) = G_BITCAST %1
+    %3:_(s32) = G_ANYEXT %2
     $vgpr0 = COPY %3
 
 ...
@@ -4704,15 +4778,32 @@ body: |
     ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; SI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; SI: $vgpr0 = COPY [[COPY4]](s32)
     ; CI-HSA-LABEL: name: test_load_global_v3s8_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
@@ -4727,15 +4818,31 @@ body: |
     ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32)
     ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C3]](s32)
-    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+    ; CI-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-HSA: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; CI-HSA: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-HSA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; CI-HSA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-HSA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-HSA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR3]](s32)
+    ; CI-HSA: $vgpr0 = COPY [[COPY5]](s32)
     ; CI-MESA-LABEL: name: test_load_global_v3s8_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
@@ -4745,15 +4852,32 @@ body: |
     ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; CI-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI-MESA: $vgpr0 = COPY [[COPY4]](s32)
     ; VI-LABEL: name: test_load_global_v3s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
@@ -4763,15 +4887,28 @@ body: |
     ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s8_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
@@ -4786,20 +4923,27 @@ body: |
     ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32)
     ; GFX9-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C3]](s32)
-    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>)
-    ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>)
-    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; GFX9-HSA: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX9-HSA: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32)
+    ; GFX9-HSA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; GFX9-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-HSA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]]
+    ; GFX9-HSA: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-HSA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16)
+    ; GFX9-HSA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]]
+    ; GFX9-HSA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX9-HSA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; GFX9-HSA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-HSA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]]
+    ; GFX9-HSA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16)
+    ; GFX9-HSA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]]
+    ; GFX9-HSA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-HSA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX9-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR3]](s32)
+    ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s8_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
@@ -4809,24 +4953,32 @@ body: |
     ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
-    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
     ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>)
-    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>)
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; GFX9-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 1)
-    %2:_(<4 x s8>) = G_IMPLICIT_DEF
-    %3:_(<4 x s8>) = G_INSERT %2, %1, 0
+    %2:_(s24) = G_BITCAST %1
+    %3:_(s32) = G_ANYEXT %2
     $vgpr0 = COPY %3
 ...
 
@@ -5234,23 +5386,40 @@ body: |
     ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v8s8_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
@@ -5261,23 +5430,40 @@ body: |
     ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; CI-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; CI-HSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>)
+    ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; CI-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-HSA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-MESA-LABEL: name: test_load_global_v8s8_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
@@ -5288,23 +5474,40 @@ body: |
     ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>)
+    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_global_v8s8_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
@@ -5315,23 +5518,40 @@ body: |
     ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-HSA-LABEL: name: test_load_global_v8s8_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
@@ -5342,27 +5562,40 @@ body: |
     ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; GFX9-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; GFX9-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; GFX9-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; GFX9-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
-    ; GFX9-HSA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>)
+    ; GFX9-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; GFX9-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-MESA-LABEL: name: test_load_global_v8s8_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
@@ -5373,30 +5606,44 @@ body: |
     ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
-    ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>)
+    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 1)
-    $vgpr0_vgpr1 = COPY %1
+    %2:_(<2 x s32>) = G_BITCAST %1
+    $vgpr0_vgpr1 = COPY %2
 ...
 
 ---
@@ -5415,41 +5662,74 @@ body: |
     ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
     ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; SI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; SI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
-    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
     ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; SI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>)
+    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v16s8_align16
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
@@ -5460,41 +5740,74 @@ body: |
     ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CI-HSA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; CI-HSA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CI-HSA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; CI-HSA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; CI-HSA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; CI-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; CI-HSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; CI-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; CI-HSA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; CI-HSA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; CI-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; CI-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; CI-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
     ; CI-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; CI-HSA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; CI-HSA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; CI-HSA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; CI-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; CI-HSA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; CI-HSA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; CI-HSA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; CI-HSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; CI-HSA: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; CI-HSA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
-    ; CI-HSA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; CI-HSA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; CI-HSA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; CI-HSA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; CI-HSA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; CI-HSA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; CI-HSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; CI-HSA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
     ; CI-HSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; CI-HSA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; CI-HSA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; CI-HSA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; CI-HSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; CI-HSA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; CI-HSA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; CI-HSA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; CI-HSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; CI-HSA: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; CI-HSA: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>)
+    ; CI-HSA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; CI-HSA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; CI-HSA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; CI-MESA-LABEL: name: test_load_global_v16s8_align16
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
@@ -5505,41 +5818,74 @@ body: |
     ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CI-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; CI-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CI-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; CI-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; CI-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; CI-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; CI-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; CI-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
     ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; CI-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
-    ; CI-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; CI-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; CI-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
     ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; CI-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>)
+    ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; VI-LABEL: name: test_load_global_v16s8_align16
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
@@ -5550,41 +5896,74 @@ body: |
     ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
     ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
-    ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
     ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; VI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>)
+    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-HSA-LABEL: name: test_load_global_v16s8_align16
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
@@ -5595,49 +5974,74 @@ body: |
     ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; GFX9-HSA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; GFX9-HSA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-HSA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; GFX9-HSA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; GFX9-HSA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; GFX9-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; GFX9-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; GFX9-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; GFX9-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
-    ; GFX9-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; GFX9-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; GFX9-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; GFX9-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
     ; GFX9-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
+    ; GFX9-HSA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; GFX9-HSA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; GFX9-HSA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; GFX9-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; GFX9-HSA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; GFX9-HSA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; GFX9-HSA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; GFX9-HSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>)
-    ; GFX9-HSA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-HSA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; GFX9-HSA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; GFX9-HSA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; GFX9-HSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; GFX9-HSA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
     ; GFX9-HSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+    ; GFX9-HSA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; GFX9-HSA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; GFX9-HSA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; GFX9-HSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; GFX9-HSA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; GFX9-HSA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; GFX9-HSA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; GFX9-HSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>)
-    ; GFX9-HSA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>)
+    ; GFX9-HSA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; GFX9-HSA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; GFX9-HSA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-MESA-LABEL: name: test_load_global_v16s8_align16
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
@@ -5648,52 +6052,78 @@ body: |
     ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; GFX9-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; GFX9-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; GFX9-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; GFX9-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
-    ; GFX9-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
     ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
+    ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>)
-    ; GFX9-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
     ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+    ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; GFX9-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>)
-    ; GFX9-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>)
+    ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 16, addrspace 1)
-    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+    %2:_(<4 x s32>) = G_BITCAST %1
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
 ...
 
 ---
@@ -5712,77 +6142,142 @@ body: |
     ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; SI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; SI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; SI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; SI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; SI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
     ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; SI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; SI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
-    ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
     ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; SI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
+    ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
     ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32)
+    ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
+    ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
+    ; SI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
     ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32)
+    ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
+    ; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
     ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32)
-    ; SI: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32)
-    ; SI: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR4]](<4 x s32>)
-    ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
+    ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
     ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[UV5]](s32)
+    ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
     ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32)
+    ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
+    ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32)
+    ; SI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
     ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32)
+    ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; SI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C1]](s32)
+    ; SI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
     ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32)
-    ; SI: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32)
-    ; SI: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR5]](<4 x s32>)
-    ; SI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; SI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; SI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; SI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C2]](s32)
+    ; SI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
     ; SI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[UV6]](s32)
+    ; SI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
     ; SI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32)
+    ; SI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C3]]
+    ; SI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32)
+    ; SI: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
     ; SI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32)
+    ; SI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C3]]
+    ; SI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C1]](s32)
+    ; SI: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
     ; SI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32)
-    ; SI: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32)
-    ; SI: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR6]](<4 x s32>)
-    ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; SI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; SI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; SI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C3]]
+    ; SI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C2]](s32)
+    ; SI: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
     ; SI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[UV7]](s32)
+    ; SI: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C3]]
     ; SI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32)
+    ; SI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C3]]
+    ; SI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
+    ; SI: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
     ; SI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32)
+    ; SI: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C3]]
+    ; SI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C1]](s32)
+    ; SI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
     ; SI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32)
-    ; SI: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32)
-    ; SI: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x s32>)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>)
+    ; SI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY32]], [[C3]]
+    ; SI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C2]](s32)
+    ; SI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     ; CI-HSA-LABEL: name: test_load_global_v32s8_align32
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
@@ -5793,77 +6288,142 @@ body: |
     ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CI-HSA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; CI-HSA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CI-HSA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; CI-HSA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; CI-HSA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; CI-HSA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; CI-HSA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; CI-HSA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; CI-HSA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; CI-HSA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; CI-HSA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; CI-HSA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; CI-HSA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; CI-HSA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; CI-HSA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; CI-HSA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; CI-HSA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; CI-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; CI-HSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; CI-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; CI-HSA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; CI-HSA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; CI-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; CI-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; CI-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
     ; CI-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; CI-HSA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; CI-HSA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; CI-HSA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; CI-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; CI-HSA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; CI-HSA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; CI-HSA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; CI-HSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; CI-HSA: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; CI-HSA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
-    ; CI-HSA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; CI-HSA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; CI-HSA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; CI-HSA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; CI-HSA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; CI-HSA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; CI-HSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; CI-HSA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
     ; CI-HSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; CI-HSA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; CI-HSA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; CI-HSA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; CI-HSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; CI-HSA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; CI-HSA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; CI-HSA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; CI-HSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; CI-HSA: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; CI-HSA: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; CI-HSA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; CI-HSA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; CI-HSA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; CI-HSA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; CI-HSA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; CI-HSA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; CI-HSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
+    ; CI-HSA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
     ; CI-HSA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32)
+    ; CI-HSA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
+    ; CI-HSA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
+    ; CI-HSA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
     ; CI-HSA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32)
+    ; CI-HSA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; CI-HSA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
+    ; CI-HSA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
     ; CI-HSA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32)
-    ; CI-HSA: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32)
-    ; CI-HSA: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR4]](<4 x s32>)
-    ; CI-HSA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; CI-HSA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; CI-HSA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; CI-HSA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; CI-HSA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
+    ; CI-HSA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
     ; CI-HSA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[UV5]](s32)
+    ; CI-HSA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
     ; CI-HSA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32)
+    ; CI-HSA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
+    ; CI-HSA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32)
+    ; CI-HSA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
     ; CI-HSA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32)
+    ; CI-HSA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; CI-HSA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C1]](s32)
+    ; CI-HSA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
     ; CI-HSA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32)
-    ; CI-HSA: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32)
-    ; CI-HSA: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR5]](<4 x s32>)
-    ; CI-HSA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; CI-HSA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; CI-HSA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; CI-HSA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; CI-HSA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C2]](s32)
+    ; CI-HSA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
     ; CI-HSA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[UV6]](s32)
+    ; CI-HSA: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
     ; CI-HSA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32)
+    ; CI-HSA: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C3]]
+    ; CI-HSA: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32)
+    ; CI-HSA: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
     ; CI-HSA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32)
+    ; CI-HSA: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C3]]
+    ; CI-HSA: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C1]](s32)
+    ; CI-HSA: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
     ; CI-HSA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32)
-    ; CI-HSA: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32)
-    ; CI-HSA: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR6]](<4 x s32>)
-    ; CI-HSA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; CI-HSA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; CI-HSA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; CI-HSA: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C3]]
+    ; CI-HSA: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C2]](s32)
+    ; CI-HSA: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
     ; CI-HSA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[UV7]](s32)
+    ; CI-HSA: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C3]]
     ; CI-HSA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32)
+    ; CI-HSA: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C3]]
+    ; CI-HSA: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
+    ; CI-HSA: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
     ; CI-HSA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32)
+    ; CI-HSA: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C3]]
+    ; CI-HSA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C1]](s32)
+    ; CI-HSA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
     ; CI-HSA: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32)
-    ; CI-HSA: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32)
-    ; CI-HSA: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x s32>)
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>)
-    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>)
+    ; CI-HSA: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY32]], [[C3]]
+    ; CI-HSA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C2]](s32)
+    ; CI-HSA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     ; CI-MESA-LABEL: name: test_load_global_v32s8_align32
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
@@ -5874,77 +6434,142 @@ body: |
     ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
     ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
     ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
-    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
-    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
-    ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
     ; CI-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
     ; CI-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
     ; CI-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
-    ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
-    ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
-    ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
-    ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; CI-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
     ; CI-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
     ; CI-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
     ; CI-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
-    ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
-    ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
-    ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
-    ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; CI-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
     ; CI-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
     ; CI-MESA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
     ; CI-MESA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; CI-MESA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; CI-MESA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; CI-MESA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; CI-MESA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; CI-MESA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; CI-MESA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; CI-MESA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; CI-MESA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; CI-MESA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
+    ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+    ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
+    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
+    ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+    ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
+    ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+    ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+    ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+    ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
+    ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
+    ; CI-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
     ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32)
+    ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
+    ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
+    ; CI-MESA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
     ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32)
+    ; CI-MESA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
+    ; CI-MESA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
     ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32)
-    ; CI-MESA: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32)
-    ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR4]](<4 x s32>)
-    ; CI-MESA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; CI-MESA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; CI-MESA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
+    ; CI-MESA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
     ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[UV5]](s32)
+    ; CI-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
     ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32)
+    ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
+    ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32)
+    ; CI-MESA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
     ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32)
+    ; CI-MESA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C1]](s32)
+    ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
     ; CI-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32)
-    ; CI-MESA: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32)
-    ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR5]](<4 x s32>)
-    ; CI-MESA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; CI-MESA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; CI-MESA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C2]](s32)
+    ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
     ; CI-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[UV6]](s32)
+    ; CI-MESA: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
     ; CI-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32)
+    ; CI-MESA: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C3]]
+    ; CI-MESA: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32)
+    ; CI-MESA: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
     ; CI-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32)
+    ; CI-MESA: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C3]]
+    ; CI-MESA: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C1]](s32)
+    ; CI-MESA: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
     ; CI-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32)
-    ; CI-MESA: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32)
-    ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR6]](<4 x s32>)
-    ; CI-MESA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; CI-MESA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; CI-MESA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; CI-MESA: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C3]]
+    ; CI-MESA: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C2]](s32)
+    ; CI-MESA: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
     ; CI-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[UV7]](s32)
+    ; CI-MESA: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C3]]
     ; CI-MESA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32)
+    ; CI-MESA: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C3]]
+    ; CI-MESA: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
+    ; CI-MESA: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
     ; CI-MESA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32)
+    ; CI-MESA: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C3]]
+    ; CI-MESA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C1]](s32)
+    ; CI-MESA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
     ; CI-MESA: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32)
-    ; CI-MESA: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32)
-    ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x s32>)
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>)
-    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>)
+    ; CI-MESA: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY32]], [[C3]]
+    ; CI-MESA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C2]](s32)
+    ; CI-MESA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     ; VI-LABEL: name: test_load_global_v32s8_align32
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
@@ -5955,77 +6580,142 @@ body: |
     ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; VI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; VI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; VI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; VI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
     ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
-    ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
     ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; VI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
+    ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
     ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32)
+    ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
+    ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
+    ; VI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
     ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32)
+    ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
+    ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
     ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32)
-    ; VI: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32)
-    ; VI: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR4]](<4 x s32>)
-    ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
+    ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
     ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[UV5]](s32)
+    ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
     ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32)
+    ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
+    ; VI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32)
+    ; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
     ; VI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32)
+    ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C1]](s32)
+    ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
     ; VI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32)
-    ; VI: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32)
-    ; VI: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR5]](<4 x s32>)
-    ; VI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; VI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; VI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C2]](s32)
+    ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
     ; VI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[UV6]](s32)
+    ; VI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
     ; VI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32)
+    ; VI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C3]]
+    ; VI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32)
+    ; VI: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
     ; VI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32)
+    ; VI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C3]]
+    ; VI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C1]](s32)
+    ; VI: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
     ; VI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32)
-    ; VI: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32)
-    ; VI: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR6]](<4 x s32>)
-    ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; VI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; VI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C3]]
+    ; VI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C2]](s32)
+    ; VI: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
     ; VI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[UV7]](s32)
+    ; VI: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C3]]
     ; VI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32)
+    ; VI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C3]]
+    ; VI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
+    ; VI: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
     ; VI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32)
+    ; VI: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C3]]
+    ; VI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C1]](s32)
+    ; VI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
     ; VI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32)
-    ; VI: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32)
-    ; VI: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x s32>)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>)
+    ; VI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY32]], [[C3]]
+    ; VI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C2]](s32)
+    ; VI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     ; GFX9-HSA-LABEL: name: test_load_global_v32s8_align32
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
@@ -6036,93 +6726,142 @@ body: |
     ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; GFX9-HSA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; GFX9-HSA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-HSA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; GFX9-HSA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; GFX9-HSA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-HSA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; GFX9-HSA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; GFX9-HSA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; GFX9-HSA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; GFX9-HSA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; GFX9-HSA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; GFX9-HSA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; GFX9-HSA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; GFX9-HSA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; GFX9-HSA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; GFX9-HSA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; GFX9-HSA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; GFX9-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; GFX9-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; GFX9-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; GFX9-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
-    ; GFX9-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; GFX9-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; GFX9-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; GFX9-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
     ; GFX9-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
+    ; GFX9-HSA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; GFX9-HSA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; GFX9-HSA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; GFX9-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; GFX9-HSA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; GFX9-HSA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; GFX9-HSA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; GFX9-HSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>)
-    ; GFX9-HSA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-HSA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; GFX9-HSA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; GFX9-HSA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; GFX9-HSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; GFX9-HSA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
     ; GFX9-HSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+    ; GFX9-HSA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; GFX9-HSA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; GFX9-HSA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; GFX9-HSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; GFX9-HSA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; GFX9-HSA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; GFX9-HSA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; GFX9-HSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>)
-    ; GFX9-HSA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; GFX9-HSA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; GFX9-HSA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; GFX9-HSA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; GFX9-HSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
+    ; GFX9-HSA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
     ; GFX9-HSA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[COPY18]](s32)
+    ; GFX9-HSA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
+    ; GFX9-HSA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
+    ; GFX9-HSA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
     ; GFX9-HSA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32)
+    ; GFX9-HSA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; GFX9-HSA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
+    ; GFX9-HSA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
     ; GFX9-HSA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS4]](<4 x s16>)
-    ; GFX9-HSA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; GFX9-HSA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; GFX9-HSA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
+    ; GFX9-HSA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
     ; GFX9-HSA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[UV5]](s32)
+    ; GFX9-HSA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
     ; GFX9-HSA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+    ; GFX9-HSA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
+    ; GFX9-HSA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32)
+    ; GFX9-HSA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
     ; GFX9-HSA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32)
+    ; GFX9-HSA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; GFX9-HSA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C1]](s32)
+    ; GFX9-HSA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
     ; GFX9-HSA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[BUILD_VECTOR_TRUNC11]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS5]](<4 x s16>)
-    ; GFX9-HSA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; GFX9-HSA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; GFX9-HSA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C2]](s32)
+    ; GFX9-HSA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
     ; GFX9-HSA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[UV6]](s32)
+    ; GFX9-HSA: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
     ; GFX9-HSA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[COPY26]](s32)
+    ; GFX9-HSA: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C3]]
+    ; GFX9-HSA: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32)
+    ; GFX9-HSA: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
     ; GFX9-HSA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32)
+    ; GFX9-HSA: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C3]]
+    ; GFX9-HSA: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C1]](s32)
+    ; GFX9-HSA: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
     ; GFX9-HSA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[COPY28]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS6:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC12]](<2 x s16>), [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS6]](<4 x s16>)
-    ; GFX9-HSA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; GFX9-HSA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; GFX9-HSA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; GFX9-HSA: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C3]]
+    ; GFX9-HSA: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C2]](s32)
+    ; GFX9-HSA: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
     ; GFX9-HSA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[UV7]](s32)
+    ; GFX9-HSA: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C3]]
     ; GFX9-HSA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY29]](s32), [[COPY30]](s32)
+    ; GFX9-HSA: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C3]]
+    ; GFX9-HSA: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
+    ; GFX9-HSA: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
     ; GFX9-HSA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32)
+    ; GFX9-HSA: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C3]]
+    ; GFX9-HSA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C1]](s32)
+    ; GFX9-HSA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
     ; GFX9-HSA: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32)
-    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC15:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY31]](s32), [[COPY32]](s32)
-    ; GFX9-HSA: [[CONCAT_VECTORS7:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC14]](<2 x s16>), [[BUILD_VECTOR_TRUNC15]](<2 x s16>)
-    ; GFX9-HSA: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS7]](<4 x s16>)
-    ; GFX9-HSA: [[CONCAT_VECTORS8:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>)
-    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS8]](<32 x s8>)
+    ; GFX9-HSA: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY32]], [[C3]]
+    ; GFX9-HSA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C2]](s32)
+    ; GFX9-HSA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     ; GFX9-MESA-LABEL: name: test_load_global_v32s8_align32
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1)
@@ -6133,96 +6872,146 @@ body: |
     ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; GFX9-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; GFX9-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; GFX9-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; GFX9-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
+    ; GFX9-MESA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+    ; GFX9-MESA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; GFX9-MESA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+    ; GFX9-MESA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
+    ; GFX9-MESA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; GFX9-MESA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+    ; GFX9-MESA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
+    ; GFX9-MESA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; GFX9-MESA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
+    ; GFX9-MESA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
+    ; GFX9-MESA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
-    ; GFX9-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
     ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
+    ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>)
-    ; GFX9-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
     ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+    ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; GFX9-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>)
-    ; GFX9-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+    ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
     ; GFX9-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
+    ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
     ; GFX9-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[COPY18]](s32)
+    ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
+    ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
+    ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL12]]
     ; GFX9-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32)
+    ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
+    ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
+    ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
     ; GFX9-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS4]](<4 x s16>)
-    ; GFX9-MESA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
+    ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
+    ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
+    ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
     ; GFX9-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[UV5]](s32)
+    ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
     ; GFX9-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+    ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
+    ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32)
+    ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL15]]
     ; GFX9-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32)
-    ; GFX9-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[BUILD_VECTOR_TRUNC11]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS5]](<4 x s16>)
-    ; GFX9-MESA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
+    ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
+    ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C1]](s32)
+    ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+    ; GFX9-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32)
+    ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
+    ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C2]](s32)
+    ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
     ; GFX9-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[UV6]](s32)
+    ; GFX9-MESA: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
     ; GFX9-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[COPY26]](s32)
+    ; GFX9-MESA: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C3]]
+    ; GFX9-MESA: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32)
+    ; GFX9-MESA: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL18]]
     ; GFX9-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32)
+    ; GFX9-MESA: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C3]]
+    ; GFX9-MESA: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C1]](s32)
+    ; GFX9-MESA: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
     ; GFX9-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[COPY28]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS6:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC12]](<2 x s16>), [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS6]](<4 x s16>)
-    ; GFX9-MESA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
-    ; GFX9-MESA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32)
-    ; GFX9-MESA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
+    ; GFX9-MESA: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C3]]
+    ; GFX9-MESA: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C2]](s32)
+    ; GFX9-MESA: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
     ; GFX9-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[UV7]](s32)
+    ; GFX9-MESA: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C3]]
     ; GFX9-MESA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY29]](s32), [[COPY30]](s32)
+    ; GFX9-MESA: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C3]]
+    ; GFX9-MESA: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
+    ; GFX9-MESA: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL21]]
     ; GFX9-MESA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32)
+    ; GFX9-MESA: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C3]]
+    ; GFX9-MESA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C1]](s32)
+    ; GFX9-MESA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
     ; GFX9-MESA: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC15:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY31]](s32), [[COPY32]](s32)
-    ; GFX9-MESA: [[CONCAT_VECTORS7:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC14]](<2 x s16>), [[BUILD_VECTOR_TRUNC15]](<2 x s16>)
-    ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS7]](<4 x s16>)
-    ; GFX9-MESA: [[CONCAT_VECTORS8:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>)
-    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS8]](<32 x s8>)
+    ; GFX9-MESA: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY32]], [[C3]]
+    ; GFX9-MESA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C2]](s32)
+    ; GFX9-MESA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+    ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32), [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32)
+    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<32 x s8>) = G_LOAD %0 :: (load (<32 x s8>), align 32, addrspace 1)
-    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
+    %2:_(<8 x s32>) = G_BITCAST %1
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2
 ...
 
 ---
@@ -6537,55 +7326,221 @@ body: |
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
     ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v3s16_align8
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
     ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CI-HSA: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CI-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-HSA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-HSA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v3s16_align8
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
     ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CI-MESA: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_global_v3s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align8
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
     ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9-HSA: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; GFX9-HSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX9-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align8
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
     ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9-MESA: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 1)
-    %2:_(<4 x s16>) = G_IMPLICIT_DEF
-    %3:_(<4 x s16>) = G_INSERT %2, %1, 0
-    $vgpr0_vgpr1 = COPY %3
+    %2:_(<3 x s16>) = G_IMPLICIT_DEF
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
 ...
 
 ---
@@ -6604,43 +7559,61 @@ body: |
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; SI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
     ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v3s16_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
     ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
+    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v3s16_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
     ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
+    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
     ; VI-LABEL: name: test_load_global_v3s16_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
     ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
+    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1)
     ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
+    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 1)
-    %2:_(<4 x s16>) = G_IMPLICIT_DEF
-    %3:_(<4 x s16>) = G_INSERT %2, %1, 0
-    $vgpr0_vgpr1 = COPY %3
+    %2:_(<3 x s16>) = G_IMPLICIT_DEF
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
 ...
 
 ---
@@ -6658,33 +7631,49 @@ body: |
     ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
     ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
     ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v3s16_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 2, addrspace 1)
     ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
+    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v3s16_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
@@ -6694,27 +7683,40 @@ body: |
     ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
     ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
-    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
     ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
-    ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_global_v3s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
@@ -6724,33 +7726,49 @@ body: |
     ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
     ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
     ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 2, addrspace 1)
     ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
+    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
@@ -6760,23 +7778,32 @@ body: |
     ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
     ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
     ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
     ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
     ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
     ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 1)
-    %2:_(<4 x s16>) = G_IMPLICIT_DEF
-    %3:_(<4 x s16>) = G_INSERT %2, %1, 0
-    $vgpr0_vgpr1 = COPY %3
+    %2:_(<3 x s16>) = G_IMPLICIT_DEF
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
 ...
 
 ---
@@ -6827,29 +7854,46 @@ body: |
     ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
     ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
     ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32)
     ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
     ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
     ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
     ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
     ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-HSA-LABEL: name: test_load_global_v3s16_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 1, addrspace 1)
     ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; CI-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
+    ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
     ; CI-MESA-LABEL: name: test_load_global_v3s16_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
@@ -6892,23 +7936,37 @@ body: |
     ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
     ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
     ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32)
     ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
     ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
     ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
     ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
     ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; CI-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_global_v3s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
@@ -6945,29 +8003,46 @@ body: |
     ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32)
     ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
     ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
     ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
     ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
     ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]]
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 1, addrspace 1)
     ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
+    ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
     ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
@@ -7004,23 +8079,32 @@ body: |
     ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32)
     ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
     ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX9-MESA: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32)
-    ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
-    ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32)
+    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 1)
-    %2:_(<4 x s16>) = G_IMPLICIT_DEF
-    %3:_(<4 x s16>) = G_INSERT %2, %1, 0
-    $vgpr0_vgpr1 = COPY %3
+    %2:_(<3 x s16>) = G_IMPLICIT_DEF
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
 ...
 
 ---

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
index e332da17579d..509c0ba1b376 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
@@ -7765,15 +7765,31 @@ body: |
     ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; SI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; SI: $vgpr0 = COPY [[COPY5]](s32)
     ; CI-LABEL: name: test_load_local_v3s8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
@@ -7783,15 +7799,31 @@ body: |
     ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI: $vgpr0 = COPY [[COPY5]](s32)
     ; CI-DS128-LABEL: name: test_load_local_v3s8_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
@@ -7801,15 +7833,31 @@ body: |
     ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI-DS128: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-DS128: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-DS128: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; CI-DS128: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CI-DS128: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI-DS128: $vgpr0 = COPY [[COPY5]](s32)
     ; VI-LABEL: name: test_load_local_v3s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
@@ -7819,15 +7867,27 @@ body: |
     ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_local_v3s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
@@ -7837,20 +7897,27 @@ body: |
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s8_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
@@ -7860,20 +7927,27 @@ body: |
     ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9-UNALIGNED: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>)
-    ; GFX9-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>)
-    ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX9-UNALIGNED: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; GFX9-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; GFX9-UNALIGNED: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX9-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; GFX9-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; GFX9-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9-UNALIGNED: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9-UNALIGNED: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX10-LABEL: name: test_load_local_v3s8_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
@@ -7883,20 +7957,27 @@ body: |
     ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX10: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX10: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX10: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX10: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>)
-    ; GFX10: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; GFX10: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>)
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; GFX10: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX10: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX10: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; GFX10: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX10: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX10: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; GFX10: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; GFX10: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX10: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s8_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1)
@@ -7906,24 +7987,31 @@ body: |
     ; GFX10-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX10-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX10-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>)
-    ; GFX10-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>)
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; GFX10-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10-UNALIGNED: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX10-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), addrspace 1, align 4)
-    %2:_(<4 x s8>) = G_IMPLICIT_DEF
-    %3:_(<4 x s8>) = G_INSERT %2, %1, 0
+    %2:_(s24) = G_BITCAST %1
+    %3:_(s32) = G_ANYEXT %2
     $vgpr0 = COPY %3
 ...
 
@@ -7942,15 +8030,32 @@ body: |
     ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; SI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; SI: $vgpr0 = COPY [[COPY4]](s32)
     ; CI-LABEL: name: test_load_local_v3s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
@@ -7960,15 +8065,32 @@ body: |
     ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI: $vgpr0 = COPY [[COPY4]](s32)
     ; CI-DS128-LABEL: name: test_load_local_v3s8_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
@@ -7978,15 +8100,32 @@ body: |
     ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-DS128: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI-DS128: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; CI-DS128: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI-DS128: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI-DS128: $vgpr0 = COPY [[COPY4]](s32)
     ; VI-LABEL: name: test_load_local_v3s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
@@ -7996,15 +8135,28 @@ body: |
     ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_local_v3s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
@@ -8014,20 +8166,28 @@ body: |
     ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
     ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s8_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9-UNALIGNED: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3)
@@ -8042,20 +8202,27 @@ body: |
     ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32)
     ; GFX9-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C3]](s32)
-    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9-UNALIGNED: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>)
-    ; GFX9-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>)
-    ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX9-UNALIGNED: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32)
+    ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+    ; GFX9-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]]
+    ; GFX9-UNALIGNED: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16)
+    ; GFX9-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]]
+    ; GFX9-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX9-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+    ; GFX9-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]]
+    ; GFX9-UNALIGNED: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16)
+    ; GFX9-UNALIGNED: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]]
+    ; GFX9-UNALIGNED: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9-UNALIGNED: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; GFX9-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR3]](s32)
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX10-LABEL: name: test_load_local_v3s8_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
@@ -8065,20 +8232,28 @@ body: |
     ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
     ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX10: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX10: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX10: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>)
-    ; GFX10: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; GFX10: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>)
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; GFX10: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX10: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX10: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX10: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX10: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX10: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX10: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s8_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
@@ -8088,24 +8263,32 @@ body: |
     ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3)
-    ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
     ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX10-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>)
-    ; GFX10-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>)
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX10-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX10-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX10-UNALIGNED: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX10-UNALIGNED: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX10-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 3)
-    %2:_(<4 x s8>) = G_IMPLICIT_DEF
-    %3:_(<4 x s8>) = G_INSERT %2, %1, 0
+    %2:_(s24) = G_BITCAST %1
+    %3:_(s32) = G_ANYEXT %2
     $vgpr0 = COPY %3
 ...
 
@@ -8124,13 +8307,22 @@ body: |
     ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI: $vgpr0 = COPY [[OR2]](s32)
     ; CI-LABEL: name: test_load_local_v4s8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
@@ -8140,13 +8332,22 @@ body: |
     ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI: $vgpr0 = COPY [[OR2]](s32)
     ; CI-DS128-LABEL: name: test_load_local_v4s8_align4
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
@@ -8156,13 +8357,22 @@ body: |
     ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI-DS128: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-DS128: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI-DS128: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_local_v4s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
@@ -8172,13 +8382,22 @@ body: |
     ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_local_v4s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
@@ -8188,15 +8407,22 @@ body: |
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s8_align4
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
@@ -8206,15 +8432,22 @@ body: |
     ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-UNALIGNED: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9-UNALIGNED: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; GFX9-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9-UNALIGNED: $vgpr0 = COPY [[OR2]](s32)
     ; GFX10-LABEL: name: test_load_local_v4s8_align4
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
@@ -8224,15 +8457,22 @@ body: |
     ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX10: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX10: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX10: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10: $vgpr0 = COPY [[OR2]](s32)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s8_align4
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3)
@@ -8242,18 +8482,26 @@ body: |
     ; GFX10-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX10-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX10-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX10-UNALIGNED: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX10-UNALIGNED: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 3)
-    $vgpr0 = COPY %1
+    %2:_(s32) = G_BITCAST %1
+    $vgpr0 = COPY %2
 ...
 
 ---
@@ -8272,23 +8520,40 @@ body: |
     ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-LABEL: name: test_load_local_v8s8_align8
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
@@ -8299,23 +8564,40 @@ body: |
     ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>)
+    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v8s8_align8
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
@@ -8326,23 +8608,40 @@ body: |
     ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI-DS128: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; CI-DS128: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CI-DS128: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; CI-DS128: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI-DS128: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; CI-DS128: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; CI-DS128: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>)
+    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI-DS128: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_local_v8s8_align8
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
@@ -8353,23 +8652,40 @@ body: |
     ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_local_v8s8_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
@@ -8380,27 +8696,40 @@ body: |
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>)
+    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v8s8_align8
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
@@ -8411,27 +8740,40 @@ body: |
     ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-UNALIGNED: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9-UNALIGNED: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; GFX9-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; GFX9-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX9-UNALIGNED: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX9-UNALIGNED: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>)
+    ; GFX9-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX10-LABEL: name: test_load_local_v8s8_align8
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
@@ -8442,27 +8784,40 @@ body: |
     ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX10: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX10: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX10: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX10: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX10: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX10: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX10: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX10: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX10: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; GFX10: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX10: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; GFX10: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX10: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX10: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX10: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
-    ; GFX10: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; GFX10: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>)
+    ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v8s8_align8
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
@@ -8473,30 +8828,44 @@ body: |
     ; GFX10-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX10-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX10-UNALIGNED: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX10-UNALIGNED: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX10-UNALIGNED: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX10-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX10-UNALIGNED: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX10-UNALIGNED: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX10-UNALIGNED: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX10-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; GFX10-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX10-UNALIGNED: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; GFX10-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX10-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX10-UNALIGNED: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>)
+    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 3)
-    $vgpr0_vgpr1 = COPY %1
+    %2:_(<2 x s32>) = G_BITCAST %1
+    $vgpr0_vgpr1 = COPY %2
 ...
 
 ---
@@ -8543,32 +8912,67 @@ body: |
     ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
     ; SI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
     ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
+    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C6]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
     ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]]
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
+    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]]
+    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
+    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C5]]
     ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]]
+    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C5]]
+    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C6]](s32)
+    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; SI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; SI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
+    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]]
+    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C7]](s32)
+    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C5]]
     ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]]
+    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C5]]
+    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C6]](s32)
+    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; SI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>)
+    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]]
+    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C7]](s32)
+    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; CI-LABEL: name: test_load_local_v16s8_align16
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
@@ -8607,32 +9011,67 @@ body: |
     ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
     ; CI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
     ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
+    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C6]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
     ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
+    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]]
+    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
+    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
+    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]]
+    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
+    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C5]]
     ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]]
+    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C5]]
+    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C6]](s32)
+    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; CI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
+    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]]
+    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C7]](s32)
+    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C5]]
     ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]]
+    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C5]]
+    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C6]](s32)
+    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; CI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>)
+    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]]
+    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C7]](s32)
+    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; CI-DS128-LABEL: name: test_load_local_v16s8_align16
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
@@ -8672,32 +9111,67 @@ body: |
     ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
     ; CI-DS128: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
     ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]]
     ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
+    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]]
+    ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
+    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI-DS128: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]]
+    ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]]
     ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]]
+    ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]]
+    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
+    ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
+    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]]
+    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]]
     ; CI-DS128: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; CI-DS128: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]]
+    ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI-DS128: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; CI-DS128: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; CI-DS128: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]]
+    ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; CI-DS128: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; CI-DS128: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; CI-DS128: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
+    ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]]
+    ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
+    ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; CI-DS128: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; CI-DS128: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]]
     ; CI-DS128: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; CI-DS128: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]]
+    ; CI-DS128: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI-DS128: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; CI-DS128: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; CI-DS128: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]]
+    ; CI-DS128: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; CI-DS128: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; CI-DS128: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; CI-DS128: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>)
+    ; CI-DS128: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C6]]
+    ; CI-DS128: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
+    ; CI-DS128: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; VI-LABEL: name: test_load_local_v16s8_align16
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
@@ -8737,32 +9211,67 @@ body: |
     ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
     ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
     ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]]
+    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]]
+    ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]]
     ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]]
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]]
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]]
     ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]]
+    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]]
+    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
+    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]]
+    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
+    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]]
     ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]]
+    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]]
+    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; VI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>)
+    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C6]]
+    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
+    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_local_v16s8_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
@@ -8802,40 +9311,67 @@ body: |
     ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
     ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
     ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]]
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]]
+    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]]
+    ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]]
     ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]]
+    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]]
+    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
+    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
+    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]]
+    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]]
     ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
+    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]]
+    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]]
+    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>)
+    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]]
+    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
+    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]]
     ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]]
+    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]]
+    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
-    ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>)
-    ; GFX9: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>)
+    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C6]]
+    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
+    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v16s8_align16
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3)
@@ -8846,49 +9382,74 @@ body: |
     ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
     ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9-UNALIGNED: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9-UNALIGNED: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; GFX9-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
     ; GFX9-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX9-UNALIGNED: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX9-UNALIGNED: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
-    ; GFX9-UNALIGNED: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; GFX9-UNALIGNED: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; GFX9-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
     ; GFX9-UNALIGNED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
+    ; GFX9-UNALIGNED: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; GFX9-UNALIGNED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; GFX9-UNALIGNED: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; GFX9-UNALIGNED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
-    ; GFX9-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>)
-    ; GFX9-UNALIGNED: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
-    ; GFX9-UNALIGNED: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32)
-    ; GFX9-UNALIGNED: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; GFX9-UNALIGNED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; GFX9-UNALIGNED: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
     ; GFX9-UNALIGNED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+    ; GFX9-UNALIGNED: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; GFX9-UNALIGNED: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; GFX9-UNALIGNED: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; GFX9-UNALIGNED: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; GFX9-UNALIGNED: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
-    ; GFX9-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>)
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>)
+    ; GFX9-UNALIGNED: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
+    ; GFX9-UNALIGNED: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32)
+    ; GFX9-UNALIGNED: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX10-LABEL: name: test_load_local_v16s8_align16
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
@@ -8928,40 +9489,67 @@ body: |
     ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
     ; GFX10: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
     ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]]
     ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX10: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
+    ; GFX10: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]]
+    ; GFX10: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
+    ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX10: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX10: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]]
+    ; GFX10: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+    ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]]
     ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX10: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]]
+    ; GFX10: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]]
+    ; GFX10: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
+    ; GFX10: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX10: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX10: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
+    ; GFX10: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]]
+    ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]]
     ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
+    ; GFX10: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]]
+    ; GFX10: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX10: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; GFX10: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]]
+    ; GFX10: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; GFX10: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
-    ; GFX10: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
-    ; GFX10: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>)
+    ; GFX10: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]]
+    ; GFX10: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
+    ; GFX10: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; GFX10: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]]
     ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+    ; GFX10: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]]
+    ; GFX10: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX10: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; GFX10: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]]
+    ; GFX10: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; GFX10: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; GFX10: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
-    ; GFX10: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
-    ; GFX10: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>)
-    ; GFX10: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>)
+    ; GFX10: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C6]]
+    ; GFX10: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
+    ; GFX10: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v16s8_align16
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
@@ -9001,43 +9589,71 @@ body: |
     ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3)
     ; GFX10-UNALIGNED: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
     ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3)
+    ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]]
     ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX10-UNALIGNED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
+    ; GFX10-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]]
+    ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
+    ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX10-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX10-UNALIGNED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]]
+    ; GFX10-UNALIGNED: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+    ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX10-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]]
     ; GFX10-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]]
+    ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX10-UNALIGNED: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]]
+    ; GFX10-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
+    ; GFX10-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX10-UNALIGNED: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
+    ; GFX10-UNALIGNED: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]]
+    ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; GFX10-UNALIGNED: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]]
     ; GFX10-UNALIGNED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
+    ; GFX10-UNALIGNED: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]]
+    ; GFX10-UNALIGNED: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX10-UNALIGNED: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; GFX10-UNALIGNED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; GFX10-UNALIGNED: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]]
+    ; GFX10-UNALIGNED: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; GFX10-UNALIGNED: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; GFX10-UNALIGNED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>)
+    ; GFX10-UNALIGNED: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]]
+    ; GFX10-UNALIGNED: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
+    ; GFX10-UNALIGNED: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; GFX10-UNALIGNED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; GFX10-UNALIGNED: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]]
     ; GFX10-UNALIGNED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+    ; GFX10-UNALIGNED: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]]
+    ; GFX10-UNALIGNED: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX10-UNALIGNED: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; GFX10-UNALIGNED: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; GFX10-UNALIGNED: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]]
+    ; GFX10-UNALIGNED: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; GFX10-UNALIGNED: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; GFX10-UNALIGNED: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>)
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>)
+    ; GFX10-UNALIGNED: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C6]]
+    ; GFX10-UNALIGNED: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
+    ; GFX10-UNALIGNED: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 1, addrspace 3)
-    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+    %2:_(<4 x s32>) = G_BITCAST %1
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
 ...
 
 ---
@@ -9444,71 +10060,275 @@ body: |
     ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
     ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-LABEL: name: test_load_local_v3s16_align8
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
     ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-DS128-LABEL: name: test_load_local_v3s16_align8
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
     ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI-DS128: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-DS128: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CI-DS128: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; CI-DS128: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CI-DS128: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CI-DS128: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CI-DS128: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; CI-DS128: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; CI-DS128: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-DS128: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_local_v3s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_local_v3s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align8
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
     ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; GFX9-UNALIGNED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX9-UNALIGNED: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX10-LABEL: name: test_load_local_v3s16_align8
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
     ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; GFX10: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; GFX10: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX10: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; GFX10: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX10: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; GFX10: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX10: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX10: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX10: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align8
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
     ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
-    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX10-UNALIGNED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX10-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX10-UNALIGNED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+    ; GFX10-UNALIGNED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX10-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX10-UNALIGNED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX10-UNALIGNED: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX10-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX10-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX10-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 3)
-    %2:_(<4 x s16>) = G_IMPLICIT_DEF
-    %3:_(<4 x s16>) = G_INSERT %2, %1, 0
-    $vgpr0_vgpr1 = COPY %3
+    %2:_(<3 x s16>) = G_IMPLICIT_DEF
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
 ...
 
 ---
@@ -9526,27 +10346,40 @@ body: |
     ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
     ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-LABEL: name: test_load_local_v3s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
@@ -9556,27 +10389,40 @@ body: |
     ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
     ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-DS128-LABEL: name: test_load_local_v3s16_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
@@ -9586,27 +10432,40 @@ body: |
     ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-DS128: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-DS128: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-DS128: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-DS128: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
-    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
+    ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
     ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI-DS128: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
-    ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; CI-DS128: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI-DS128: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI-DS128: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_local_v3s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
@@ -9616,27 +10475,40 @@ body: |
     ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
     ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_local_v3s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
@@ -9646,24 +10518,36 @@ body: |
     ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
     ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align2
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 2, addrspace 3)
     ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
     ; GFX10-LABEL: name: test_load_local_v3s16_align2
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
@@ -9673,18 +10557,27 @@ body: |
     ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX10: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX10: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX10: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
     ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
     ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
     ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
     ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX10: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX10: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX10: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
-    ; GFX10: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align2
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3)
@@ -9694,23 +10587,32 @@ body: |
     ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
+    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX10-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
     ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
     ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
     ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
     ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX10-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX10-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX10-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX10-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX10-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 3)
-    %2:_(<4 x s16>) = G_IMPLICIT_DEF
-    %3:_(<4 x s16>) = G_INSERT %2, %1, 0
-    $vgpr0_vgpr1 = COPY %3
+    %2:_(<3 x s16>) = G_IMPLICIT_DEF
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
 ...
 
 ---
@@ -9761,23 +10663,37 @@ body: |
     ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
     ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
     ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32)
     ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
     ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
     ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
     ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
     ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-LABEL: name: test_load_local_v3s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
@@ -9820,23 +10736,37 @@ body: |
     ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
     ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
     ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32)
     ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
     ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
     ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
     ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
     ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-DS128-LABEL: name: test_load_local_v3s16_align1
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
@@ -9879,23 +10809,37 @@ body: |
     ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
     ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
     ; CI-DS128: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI-DS128: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI-DS128: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI-DS128: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-DS128: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI-DS128: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32)
     ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
     ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
     ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI-DS128: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
     ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
     ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; CI-DS128: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; CI-DS128: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI-DS128: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; CI-DS128: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_local_v3s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
@@ -9932,23 +10876,37 @@ body: |
     ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32)
     ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
     ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
     ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
     ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
     ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]]
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_local_v3s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
@@ -9985,24 +10943,36 @@ body: |
     ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32)
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
     ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align1
     ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 1, addrspace 3)
     ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
+    ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
+    ; GFX9-UNALIGNED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[UV]](<3 x s16>)
+    ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
     ; GFX10-LABEL: name: test_load_local_v3s16_align1
     ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
@@ -10039,18 +11009,27 @@ body: |
     ; GFX10: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; GFX10: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; GFX10: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX10: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX10: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX10: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32)
     ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
     ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX10: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32)
-    ; GFX10: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX10: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX10: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
-    ; GFX10: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32)
+    ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align1
     ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3)
@@ -10087,23 +11066,32 @@ body: |
     ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX10-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32)
     ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX10-UNALIGNED: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
     ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX10-UNALIGNED: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32)
-    ; GFX10-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX10-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
-    ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32)
+    ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p3) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 3)
-    %2:_(<4 x s16>) = G_IMPLICIT_DEF
-    %3:_(<4 x s16>) = G_INSERT %2, %1, 0
-    $vgpr0_vgpr1 = COPY %3
+    %2:_(<3 x s16>) = G_IMPLICIT_DEF
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
 ...
 
 ---

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
index ebcf2ce7049d..226aa1bd3893 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
@@ -4684,15 +4684,31 @@ body: |
     ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; SI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; SI: $vgpr0 = COPY [[COPY5]](s32)
     ; CI-LABEL: name: test_load_private_v3s8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
@@ -4702,15 +4718,31 @@ body: |
     ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI: $vgpr0 = COPY [[COPY5]](s32)
     ; VI-LABEL: name: test_load_private_v3s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
@@ -4720,15 +4752,27 @@ body: |
     ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_private_v3s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
@@ -4738,24 +4782,31 @@ body: |
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]]
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]]
+    ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]]
+    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), addrspace 5, align 4)
-    %2:_(<4 x s8>) = G_IMPLICIT_DEF
-    %3:_(<4 x s8>) = G_INSERT %2, %1, 0
+    %2:_(s24) = G_BITCAST %1
+    %3:_(s32) = G_ANYEXT %2
     $vgpr0 = COPY %3
 ...
 
@@ -4774,15 +4825,32 @@ body: |
     ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; SI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; SI: $vgpr0 = COPY [[COPY4]](s32)
     ; CI-LABEL: name: test_load_private_v3s8_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
@@ -4792,15 +4860,32 @@ body: |
     ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; CI: $vgpr0 = COPY [[COPY4]](s32)
     ; VI-LABEL: name: test_load_private_v3s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
@@ -4810,15 +4895,28 @@ body: |
     ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; VI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>)
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: $vgpr0 = COPY [[COPY1]](s32)
     ; GFX9-LABEL: name: test_load_private_v3s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
@@ -4828,24 +4926,32 @@ body: |
     ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
     ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>)
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0
-    ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>)
+    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 5)
-    %2:_(<4 x s8>) = G_IMPLICIT_DEF
-    %3:_(<4 x s8>) = G_INSERT %2, %1, 0
+    %2:_(s24) = G_BITCAST %1
+    %3:_(s32) = G_ANYEXT %2
     $vgpr0 = COPY %3
 ...
 
@@ -4864,13 +4970,22 @@ body: |
     ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; SI: $vgpr0 = COPY [[OR2]](s32)
     ; CI-LABEL: name: test_load_private_v4s8_align4
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
@@ -4880,13 +4995,22 @@ body: |
     ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; CI: $vgpr0 = COPY [[OR2]](s32)
     ; VI-LABEL: name: test_load_private_v4s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
@@ -4896,13 +5020,22 @@ body: |
     ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; VI: $vgpr0 = COPY [[OR2]](s32)
     ; GFX9-LABEL: name: test_load_private_v4s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5)
@@ -4912,18 +5045,26 @@ body: |
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
     ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+    ; GFX9: $vgpr0 = COPY [[OR2]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 5)
-    $vgpr0 = COPY %1
+    %2:_(s32) = G_BITCAST %1
+    $vgpr0 = COPY %2
 ...
 
 ---
@@ -4944,23 +5085,40 @@ body: |
     ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
     ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
+    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]]
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
-    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
-    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]]
     ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]]
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]]
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
+    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]]
+    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
+    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; CI-LABEL: name: test_load_private_v8s8_align8
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
@@ -4973,23 +5131,40 @@ body: |
     ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
     ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
+    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
-    ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
-    ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]]
     ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]]
+    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]]
+    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
+    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>)
+    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]]
+    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
+    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; VI-LABEL: name: test_load_private_v8s8_align8
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
@@ -5002,23 +5177,40 @@ body: |
     ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
     ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
+    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
-    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
-    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]]
     ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]]
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]]
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_load_private_v8s8_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
@@ -5031,30 +5223,44 @@ body: |
     ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
     ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
+    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
+    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]]
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32)
-    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32)
-    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]]
     ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]]
+    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]]
+    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
+    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>)
+    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]]
+    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
+    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 5)
-    $vgpr0_vgpr1 = COPY %1
+    %2:_(<2 x s32>) = G_BITCAST %1
+    $vgpr0_vgpr1 = COPY %2
 ...
 
 ---
@@ -5102,32 +5308,67 @@ body: |
     ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
     ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
     ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]]
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]]
+    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]]
+    ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]]
     ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]]
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]]
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
+    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]]
+    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]]
     ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]]
+    ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]]
+    ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; SI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; SI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
+    ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]]
+    ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
+    ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]]
     ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]]
+    ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]]
+    ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; SI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; SI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>)
+    ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C6]]
+    ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
+    ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; CI-LABEL: name: test_load_private_v16s8_align16
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
@@ -5167,32 +5408,67 @@ body: |
     ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
     ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
     ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]]
+    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
+    ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]]
+    ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]]
     ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]]
+    ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]]
+    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
+    ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
+    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]]
+    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]]
     ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]]
+    ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]]
+    ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; CI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
+    ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]]
+    ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
+    ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]]
     ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]]
+    ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; CI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]]
+    ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; CI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>)
+    ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C6]]
+    ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
+    ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; VI-LABEL: name: test_load_private_v16s8_align16
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
@@ -5232,32 +5508,67 @@ body: |
     ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
     ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
     ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]]
+    ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
+    ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]]
+    ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]]
     ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]]
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]]
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]]
     ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
+    ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]]
+    ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]]
+    ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>)
+    ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]]
+    ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
+    ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]]
     ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
+    ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]]
+    ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]]
+    ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
-    ; VI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>)
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>)
+    ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C6]]
+    ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
+    ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-LABEL: name: test_load_private_v16s8_align16
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
@@ -5297,43 +5608,71 @@ body: |
     ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5)
     ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32)
     ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5)
+    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]]
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]]
+    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C7]](s32)
+    ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
     ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
+    ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]]
+    ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+    ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
     ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+    ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]]
     ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]]
+    ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+    ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
     ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+    ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]]
+    ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C7]](s32)
+    ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
     ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
-    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>)
+    ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]]
+    ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+    ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
     ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
+    ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]]
     ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
+    ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]]
+    ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32)
+    ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
     ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
+    ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]]
+    ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32)
+    ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
     ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
-    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>)
+    ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]]
+    ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C8]](s32)
+    ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
     ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32)
+    ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]]
     ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
+    ; GFX9: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]]
+    ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32)
+    ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
     ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
+    ; GFX9: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]]
+    ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32)
+    ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
     ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
-    ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
-    ; GFX9: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>)
-    ; GFX9: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>)
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>)
+    ; GFX9: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C6]]
+    ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C8]](s32)
+    ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 1, addrspace 5)
-    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+    %2:_(<4 x s32>) = G_BITCAST %1
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
 ...
 
 ---
@@ -5591,6 +5930,15 @@ body: |
     ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
     ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
     ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
     ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
@@ -5598,19 +5946,23 @@ body: |
     ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
     ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
     ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
     ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
     ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]]
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-LABEL: name: test_load_private_v3s16_align8
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
@@ -5620,6 +5972,15 @@ body: |
     ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
     ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
     ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
     ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
@@ -5627,19 +5988,23 @@ body: |
     ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
     ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
     ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
     ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
     ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]]
+    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_private_v3s16_align8
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
@@ -5649,6 +6014,15 @@ body: |
     ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
     ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
     ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
@@ -5656,19 +6030,23 @@ body: |
     ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
     ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
     ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
     ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
     ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_private_v3s16_align8
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5)
@@ -5678,23 +6056,31 @@ body: |
     ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
     ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
     ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 5)
-    %2:_(<4 x s16>) = G_IMPLICIT_DEF
-    %3:_(<4 x s16>) = G_INSERT %2, %1, 0
-    $vgpr0_vgpr1 = COPY %3
+    %2:_(<3 x s16>) = G_IMPLICIT_DEF
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
 ...
 
 ---
@@ -5712,27 +6098,40 @@ body: |
     ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
-    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
     ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
-    ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-LABEL: name: test_load_private_v3s16_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
@@ -5742,27 +6141,40 @@ body: |
     ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
     ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_private_v3s16_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
@@ -5772,27 +6184,40 @@ body: |
     ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
-    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
     ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
-    ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
     ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
-    ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
     ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_private_v3s16_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5)
@@ -5802,23 +6227,32 @@ body: |
     ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
     ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
+    ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
     ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
     ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
     ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 5)
-    %2:_(<4 x s16>) = G_IMPLICIT_DEF
-    %3:_(<4 x s16>) = G_INSERT %2, %1, 0
-    $vgpr0_vgpr1 = COPY %3
+    %2:_(<3 x s16>) = G_IMPLICIT_DEF
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
 ...
 
 ---
@@ -5869,23 +6303,37 @@ body: |
     ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
     ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
     ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32)
     ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
     ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
     ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
     ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
     ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; CI-LABEL: name: test_load_private_v3s16_align1
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
@@ -5928,23 +6376,37 @@ body: |
     ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32)
     ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
     ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]]
+    ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
+    ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32)
     ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
     ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32)
     ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
     ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32)
+    ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+    ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
     ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+    ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
+    ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; VI-LABEL: name: test_load_private_v3s16_align1
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
@@ -5981,23 +6443,37 @@ body: |
     ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32)
     ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
     ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
-    ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
     ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
-    ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
     ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
-    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+    ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32)
     ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
-    ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
-    ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
-    ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
-    ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]]
+    ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32)
+    ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]]
+    ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: test_load_private_v3s16_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
     ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5)
@@ -6034,23 +6510,32 @@ body: |
     ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
     ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
     ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32)
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
     ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
-    ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
-    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0
-    ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(p5) = COPY $vgpr0
     %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 5)
-    %2:_(<4 x s16>) = G_IMPLICIT_DEF
-    %3:_(<4 x s16>) = G_INSERT %2, %1, 0
-    $vgpr0_vgpr1 = COPY %3
+    %2:_(<3 x s16>) = G_IMPLICIT_DEF
+    %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
 ...
 
 ---


        


More information about the llvm-commits mailing list