[llvm] baafe82 - AMDGPU/GlobalISel: Remove bitcast legality hack

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 5 13:24:33 PST 2020


Author: Matt Arsenault
Date: 2020-02-05T16:24:24-05:00
New Revision: baafe82b07ade3fff4f2685199870b67083a17d5

URL: https://github.com/llvm/llvm-project/commit/baafe82b07ade3fff4f2685199870b67083a17d5
DIFF: https://github.com/llvm/llvm-project/commit/baafe82b07ade3fff4f2685199870b67083a17d5.diff

LOG: AMDGPU/GlobalISel: Remove bitcast legality hack

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 0424a3c3644f..149e871d2083 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -323,8 +323,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
   getActionDefinitionsBuilder(G_BITCAST)
     // Don't worry about the size constraint.
     .legalIf(all(isRegisterType(0), isRegisterType(1)))
-    // FIXME: Testing hack
-    .legalForCartesianProduct({S16, LLT::vector(2, 8), })
     .lower();
 
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
index 23fc72a91958..12521e08d874 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
@@ -445,3 +445,51 @@ body: |
     %3:_(s64) = G_ANYEXT %2
     $vgpr0_vgpr1 = COPY %3
 ...
+
+---
+name: test_bitcast_s16_to_v2s8
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_bitcast_s16_to_v2s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s16) = G_TRUNC %0
+    %2:_(<2 x s8>) = G_BITCAST %1
+    %3:_(<2 x s32>) = G_ANYEXT %2
+    $vgpr0_vgpr1 = COPY %3
+...
+
+---
+name: test_bitcast_v2s8_to_s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: test_bitcast_v2s8_to_s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[COPY]](<2 x s32>)
+    ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<2 x s8>)
+    ; CHECK: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16)
+    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(<2 x s8>) = G_TRUNC %0
+    %2:_(s16) = G_BITCAST %1
+    %3:_(s32) = G_ANYEXT %2
+    $vgpr0 = COPY %3
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
index b9c867407bdf..c47f05b1e4e3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
@@ -4107,36 +4107,63 @@ body: |
     ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
     ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
-    ; CI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; CI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]]
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_constant_v2s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
-    ; VI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_constant_v2s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-MESA-LABEL: name: test_load_constant_v2s8_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
     ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]]
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-MESA-LABEL: name: test_load_constant_v2s8_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4)
     ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; GFX9-MESA: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16)
+    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 4, addrspace 4)
@@ -4170,8 +4197,14 @@ body: |
     ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
     ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; CI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; CI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]]
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_constant_v2s8_align2
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
@@ -4192,8 +4225,13 @@ body: |
     ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
     ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; VI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_constant_v2s8_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
@@ -4216,8 +4254,13 @@ body: |
     ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
     ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-MESA-LABEL: name: test_load_constant_v2s8_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
@@ -4238,8 +4281,14 @@ body: |
     ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
     ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]]
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-MESA-LABEL: name: test_load_constant_v2s8_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
@@ -4262,8 +4311,13 @@ body: |
     ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
     ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16)
+    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 4)
@@ -4284,12 +4338,17 @@ body: |
     ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1, addrspace 4)
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_constant_v2s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
@@ -4297,12 +4356,15 @@ body: |
     ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1, addrspace 4)
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_constant_v2s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
@@ -4310,12 +4372,15 @@ body: |
     ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1, addrspace 4)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-MESA-LABEL: name: test_load_constant_v2s8_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
@@ -4323,12 +4388,17 @@ body: |
     ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1, addrspace 4)
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>)
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-MESA-LABEL: name: test_load_constant_v2s8_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
@@ -4336,12 +4406,15 @@ body: |
     ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1, addrspace 4)
-    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p4) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 4)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
index 0bf5ae054ea1..e14c5f194b88 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
@@ -4177,36 +4177,63 @@ body: |
     ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
     ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
-    ; CI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; CI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]]
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_flat_v2s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
-    ; VI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_flat_v2s8_align4
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-MESA-LABEL: name: test_load_flat_v2s8_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
     ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]]
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-MESA-LABEL: name: test_load_flat_v2s8_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4)
     ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; GFX9-MESA: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16)
+    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 4, addrspace 0)
@@ -4240,8 +4267,14 @@ body: |
     ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
     ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; CI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; CI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]]
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_flat_v2s8_align2
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
@@ -4262,8 +4295,13 @@ body: |
     ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
     ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; VI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_flat_v2s8_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
@@ -4286,8 +4324,13 @@ body: |
     ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
     ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-MESA-LABEL: name: test_load_flat_v2s8_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
@@ -4308,8 +4351,14 @@ body: |
     ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
     ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]]
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-MESA-LABEL: name: test_load_flat_v2s8_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
@@ -4332,8 +4381,13 @@ body: |
     ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
     ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16)
+    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 0)
@@ -4354,12 +4408,17 @@ body: |
     ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1)
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
-    ; CI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_flat_v2s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
@@ -4367,12 +4426,15 @@ body: |
     ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1)
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_flat_v2s8_align1
     ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
@@ -4380,12 +4442,15 @@ body: |
     ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1)
-    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-MESA-LABEL: name: test_load_flat_v2s8_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
@@ -4393,12 +4458,17 @@ body: |
     ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1)
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>)
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-MESA-LABEL: name: test_load_flat_v2s8_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
@@ -4406,12 +4476,15 @@ body: |
     ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1)
-    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p0) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 0)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
index 6d9a4497206f..077d1796be8c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
@@ -3780,43 +3780,76 @@ body: |
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
     ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
-    ; SI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; SI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; SI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]]
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-HSA-LABEL: name: test_load_global_v2s8_align4
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
     ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI-HSA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; CI-HSA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-HSA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+    ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]]
+    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-MESA-LABEL: name: test_load_global_v2s8_align4
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
     ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]]
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_global_v2s8_align4
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
-    ; VI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align4
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
     ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; GFX9-HSA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; GFX9-HSA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; GFX9-HSA: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16)
+    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align4
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
     ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; GFX9-MESA: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16)
+    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 4, addrspace 1)
@@ -3850,8 +3883,14 @@ body: |
     ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
     ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; SI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; SI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; SI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]]
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-HSA-LABEL: name: test_load_global_v2s8_align2
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -3872,8 +3911,14 @@ body: |
     ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
     ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI-HSA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; CI-HSA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-HSA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]]
+    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-MESA-LABEL: name: test_load_global_v2s8_align2
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -3894,8 +3939,14 @@ body: |
     ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
     ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]]
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_global_v2s8_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -3916,8 +3967,13 @@ body: |
     ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
     ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; VI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align2
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -3940,8 +3996,13 @@ body: |
     ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
     ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; GFX9-HSA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; GFX9-HSA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16)
+    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align2
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -3964,8 +4025,13 @@ body: |
     ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
     ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16)
+    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 1)
@@ -3986,12 +4052,17 @@ body: |
     ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1, addrspace 1)
-    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32)
-    ; SI: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-HSA-LABEL: name: test_load_global_v2s8_align1
     ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -4012,8 +4083,14 @@ body: |
     ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
     ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI-HSA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; CI-HSA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-HSA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]]
+    ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-MESA-LABEL: name: test_load_global_v2s8_align1
     ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -4021,12 +4098,17 @@ body: |
     ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1, addrspace 1)
-    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32)
-    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
-    ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>)
-    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+    ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+    ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+    ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_global_v2s8_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -4034,12 +4116,15 @@ body: |
     ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1, addrspace 1)
-    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32)
-    ; VI: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align1
     ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -4062,8 +4147,13 @@ body: |
     ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
     ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9-HSA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; GFX9-HSA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; GFX9-HSA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; GFX9-HSA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16)
+    ; GFX9-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align1
     ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -4071,12 +4161,15 @@ body: |
     ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
     ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1, addrspace 1)
-    ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>)
-    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+    ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+    ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
+    ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 1)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
index 145e1dd028c5..344e4b42cdc5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
@@ -5355,8 +5355,14 @@ body: |
     ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
     ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; SI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; SI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; SI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]]
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-LABEL: name: test_load_local_v2s8_align2
     ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -5377,8 +5383,14 @@ body: |
     ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
     ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; CI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; CI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]]
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-DS128-LABEL: name: test_load_local_v2s8_align2
     ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -5399,8 +5411,14 @@ body: |
     ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
     ; CI-DS128: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; CI-DS128: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI-DS128: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]]
+    ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI-DS128: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_local_v2s8_align2
     ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -5421,8 +5439,13 @@ body: |
     ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
     ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; VI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_local_v2s8_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
@@ -5445,8 +5468,13 @@ body: |
     ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
     ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p3) = COPY $vgpr0
     %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 3)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
index e5ac91bafea8..ecd8345dad12 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
@@ -4362,8 +4362,14 @@ body: |
     ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
     ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; SI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; SI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; SI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]]
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; CI-LABEL: name: test_load_private_v2s8_align2
     ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
@@ -4384,8 +4390,14 @@ body: |
     ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
     ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; CI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; CI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; CI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8)
+    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]]
+    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_load_private_v2s8_align2
     ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
@@ -4406,8 +4418,13 @@ body: |
     ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
     ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; VI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_load_private_v2s8_align2
     ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
@@ -4430,8 +4447,13 @@ body: |
     ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
     ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0
-    ; GFX9: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16)
+    ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>)
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8)
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8)
+    ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16)
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(p5) = COPY $vgpr0
     %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 5)


        


More information about the llvm-commits mailing list