[llvm] 984a499 - AMDGPU/GlobalISel: Fix using unlegalizable values in tests

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 25 06:39:38 PDT 2020


Author: Matt Arsenault
Date: 2020-08-25T09:39:32-04:00
New Revision: 984a499f9dff0fd16a6d4591970a58fabf966c4c

URL: https://github.com/llvm/llvm-project/commit/984a499f9dff0fd16a6d4591970a58fabf966c4c
DIFF: https://github.com/llvm/llvm-project/commit/984a499f9dff0fd16a6d4591970a58fabf966c4c.diff

LOG: AMDGPU/GlobalISel: Fix using unlegalizable values in tests

Implicit uses of non-register value types places impossible to satisfy
constraints on the legalizer / artifact combiner. These prevent
writing sensible legalize rules for the artifacts without triggering
infinite loops in the legalizer.

The verifier really needs to enforce this, but I'm not sure what the
exact conditions would look like yet.

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
index 4476fec23ac41..70fe319659ca8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
@@ -583,13 +583,59 @@ body:             |
     ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5
     ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s16>)
     ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
-    ; CHECK: S_ENDPGM 0, implicit [[UV]](<3 x s16>), implicit [[UV1]](<3 x s16>), implicit [[UV2]](<3 x s16>), implicit [[UV3]](<3 x s16>)
+    ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
+    ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
+    ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV1]](<3 x s16>), 0
+    ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
+    ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0
+    ; CHECK: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+    ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+    ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+    ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV3]](<3 x s16>), 0
+    ; CHECK: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>)
+    ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+    ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32)
+    ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>), implicit [[BUILD_VECTOR1]](<3 x s32>), implicit [[BUILD_VECTOR2]](<3 x s32>), implicit [[BUILD_VECTOR3]](<3 x s32>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
     %2:_(<4 x s16>) = COPY $vgpr4_vgpr5
     %3:_(<12 x s16>) = G_CONCAT_VECTORS %0, %1, %2
     %4:_(<3 x s16>), %5:_(<3 x s16>), %6:_(<3 x s16>), %7:_(<3 x s16>) = G_UNMERGE_VALUES %3
-    S_ENDPGM 0, implicit %4, implicit %5, implicit %6, implicit %7
+    %8:_(<3 x s32>) = G_ANYEXT %4
+    %9:_(<3 x s32>) = G_ANYEXT %5
+    %10:_(<3 x s32>) = G_ANYEXT %6
+    %11:_(<3 x s32>) = G_ANYEXT %7
+    S_ENDPGM 0, implicit %8, implicit %9, implicit %10, implicit %11
 ...
 
 ---
@@ -1080,13 +1126,15 @@ body:             |
     ; CHECK-LABEL: name: test_unmerge_values_v2s8_v4s8_trunc_v4s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[UV]](<2 x s32>)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[UV1]](<2 x s32>)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](<2 x s8>), implicit [[TRUNC1]](<2 x s8>)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
+    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](<2 x s16>), implicit [[TRUNC1]](<2 x s16>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<4 x s8>) = G_TRUNC %0
     %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %1
-    S_ENDPGM 0, implicit %2, implicit %3
+    %4:_(<2 x s16>) = G_ANYEXT %2
+    %5:_(<2 x s16>) = G_ANYEXT %3
+    S_ENDPGM 0, implicit %4, implicit %5
 
 ...
 
@@ -1231,13 +1279,15 @@ body:             |
     ; CHECK-LABEL: name: test_unmerge_values_v2s8_v4s8_trunc_v4s16
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[UV]](<2 x s16>)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[UV1]](<2 x s16>)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](<2 x s8>), implicit [[TRUNC1]](<2 x s8>)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY [[UV]](<2 x s16>)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY [[UV1]](<2 x s16>)
+    ; CHECK: S_ENDPGM 0, implicit [[COPY1]](<2 x s16>), implicit [[COPY2]](<2 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s8>) = G_TRUNC %0
     %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %1
-    S_ENDPGM 0, implicit %2, implicit %3
+    %4:_(<2 x s16>) = G_ANYEXT %2
+    %5:_(<2 x s16>) = G_ANYEXT %3
+    S_ENDPGM 0, implicit %4, implicit %5
 
 ...
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
index de65d83f11597..79c78d29e6729 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
@@ -1557,12 +1557,49 @@ body: |
     ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
     ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
-    ; CHECK: S_ENDPGM 0, implicit [[EXTRACT]](<3 x s16>)
+    ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C2]](s32)
+    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C2]](s32)
+    ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; CHECK: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
+    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C2]](s32)
+    ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
+    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C2]](s32)
+    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]]
+    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]]
+    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
+    ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]]
+    ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]]
+    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]]
+    ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
+    ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+    ; CHECK: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]]
+    ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C4]]
+    ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
+    ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL7]]
+    ; CHECK: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+    ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS1]](<6 x s16>)
     %0:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     %1:_(<6 x s8>) = G_TRUNC %0
     %2:_(<6 x s8>) = G_ADD %1, %1
     %3:_(<3 x s16>) = G_BITCAST %2
-    S_ENDPGM 0, implicit %3
+    %4:_(<6 x s16>) = G_CONCAT_VECTORS %3, %3
+    S_ENDPGM 0, implicit %4
 ...
 
 ---

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir
index 57bd59f6233a4..bfc4df09b275c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir
@@ -65,7 +65,42 @@ body: |
     ; GFX78: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; GFX78: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF]](<2 x s16>)
     ; GFX78: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
-    ; GFX78: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; GFX78: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX78: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX78: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+    ; GFX78: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX78: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
+    ; GFX78: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX78: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
+    ; GFX78: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX78: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; GFX78: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX78: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32)
+    ; GFX78: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX78: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C1]](s32)
+    ; GFX78: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX78: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]]
+    ; GFX78: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX78: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C]]
+    ; GFX78: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C1]](s32)
+    ; GFX78: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
+    ; GFX78: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX78: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX78: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C]]
+    ; GFX78: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX78: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C]]
+    ; GFX78: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX78: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
+    ; GFX78: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; GFX78: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX78: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
+    ; GFX78: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; GFX78: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
+    ; GFX78: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C1]](s32)
+    ; GFX78: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
+    ; GFX78: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; GFX78: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+    ; GFX78: S_NOP 0, implicit [[CONCAT_VECTORS1]](<6 x s16>)
     ; GFX9-LABEL: name: build_vector_v3s16
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
@@ -79,7 +114,31 @@ body: |
     ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
-    ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32)
+    ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32)
+    ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
+    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
+    ; GFX9: S_NOP 0, implicit [[CONCAT_VECTORS1]](<6 x s16>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -87,7 +146,8 @@ body: |
     %4:_(s16) = G_TRUNC %1
     %5:_(s16) = G_TRUNC %2
     %6:_(<3 x s16>) = G_BUILD_VECTOR %3, %4, %5
-    S_NOP 0, implicit %6
+    %7:_(<6 x s16>) = G_CONCAT_VECTORS %6, %6
+    S_NOP 0, implicit %7
 ...
 
 ---
@@ -181,7 +241,60 @@ body: |
     ; GFX78: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; GFX78: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
     ; GFX78: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<10 x s16>), 0
-    ; GFX78: S_NOP 0, implicit [[EXTRACT]](<5 x s16>)
+    ; GFX78: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX78: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<5 x s16>), 0
+    ; GFX78: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>)
+    ; GFX78: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX78: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
+    ; GFX78: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX78: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32)
+    ; GFX78: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX78: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C1]](s32)
+    ; GFX78: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<5 x s16>), 0
+    ; GFX78: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>)
+    ; GFX78: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX78: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32)
+    ; GFX78: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; GFX78: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32)
+    ; GFX78: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; GFX78: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32)
+    ; GFX78: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX78: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
+    ; GFX78: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX78: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]]
+    ; GFX78: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32)
+    ; GFX78: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
+    ; GFX78: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; GFX78: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX78: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]]
+    ; GFX78: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX78: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]]
+    ; GFX78: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C1]](s32)
+    ; GFX78: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
+    ; GFX78: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; GFX78: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; GFX78: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]]
+    ; GFX78: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX78: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]]
+    ; GFX78: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; GFX78: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]]
+    ; GFX78: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; GFX78: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; GFX78: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C]]
+    ; GFX78: [[COPY17:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX78: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C]]
+    ; GFX78: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C1]](s32)
+    ; GFX78: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]]
+    ; GFX78: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+    ; GFX78: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX78: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C]]
+    ; GFX78: [[COPY19:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32)
+    ; GFX78: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C]]
+    ; GFX78: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; GFX78: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]]
+    ; GFX78: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; GFX78: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>)
+    ; GFX78: S_NOP 0, implicit [[CONCAT_VECTORS1]](<10 x s16>)
     ; GFX9-LABEL: name: build_vector_v5s16
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
@@ -200,7 +313,41 @@ body: |
     ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[DEF1]](<2 x s16>), [[DEF1]](<2 x s16>)
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<10 x s16>), 0
-    ; GFX9: S_NOP 0, implicit [[EXTRACT]](<5 x s16>)
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<5 x s16>), 0
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<5 x s16>), 0
+    ; GFX9: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>)
+    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
+    ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
+    ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+    ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
+    ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32)
+    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
+    ; GFX9: S_NOP 0, implicit [[CONCAT_VECTORS1]](<10 x s16>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -212,7 +359,8 @@ body: |
     %8:_(s16) = G_TRUNC %3
     %9:_(s16) = G_TRUNC %4
     %10:_(<5 x s16>) = G_BUILD_VECTOR %5, %6, %7, %8, %9
-    S_NOP 0, implicit %10
+    %11:_(<10 x s16>) = G_CONCAT_VECTORS %10, %10
+    S_NOP 0, implicit %11
 ...
 
 ---
@@ -261,7 +409,78 @@ body: |
     ; GFX78: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; GFX78: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
     ; GFX78: [[EXTRACT:%[0-9]+]]:_(<7 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<14 x s16>), 0
-    ; GFX78: S_NOP 0, implicit [[EXTRACT]](<7 x s16>)
+    ; GFX78: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX78: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<7 x s16>), 0
+    ; GFX78: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<8 x s16>)
+    ; GFX78: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX78: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32)
+    ; GFX78: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX78: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C1]](s32)
+    ; GFX78: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX78: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32)
+    ; GFX78: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX78: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32)
+    ; GFX78: [[INSERT1:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<7 x s16>), 0
+    ; GFX78: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<8 x s16>)
+    ; GFX78: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; GFX78: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32)
+    ; GFX78: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; GFX78: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C1]](s32)
+    ; GFX78: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; GFX78: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32)
+    ; GFX78: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; GFX78: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C1]](s32)
+    ; GFX78: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX78: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]]
+    ; GFX78: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX78: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]]
+    ; GFX78: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C1]](s32)
+    ; GFX78: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
+    ; GFX78: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; GFX78: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; GFX78: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C]]
+    ; GFX78: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX78: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C]]
+    ; GFX78: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32)
+    ; GFX78: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]]
+    ; GFX78: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; GFX78: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX78: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C]]
+    ; GFX78: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX78: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C]]
+    ; GFX78: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C1]](s32)
+    ; GFX78: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]]
+    ; GFX78: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+    ; GFX78: [[COPY20:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX78: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C]]
+    ; GFX78: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32)
+    ; GFX78: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C]]
+    ; GFX78: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32)
+    ; GFX78: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]]
+    ; GFX78: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+    ; GFX78: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX78: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C]]
+    ; GFX78: [[COPY23:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32)
+    ; GFX78: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C]]
+    ; GFX78: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND16]], [[C1]](s32)
+    ; GFX78: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND15]], [[SHL8]]
+    ; GFX78: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+    ; GFX78: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
+    ; GFX78: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C]]
+    ; GFX78: [[COPY25:%[0-9]+]]:_(s32) = COPY [[BITCAST10]](s32)
+    ; GFX78: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C]]
+    ; GFX78: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32)
+    ; GFX78: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND17]], [[SHL9]]
+    ; GFX78: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+    ; GFX78: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; GFX78: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C]]
+    ; GFX78: [[COPY27:%[0-9]+]]:_(s32) = COPY [[BITCAST11]](s32)
+    ; GFX78: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C]]
+    ; GFX78: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C1]](s32)
+    ; GFX78: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL10]]
+    ; GFX78: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+    ; GFX78: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>), [[BITCAST16]](<2 x s16>), [[BITCAST17]](<2 x s16>), [[BITCAST18]](<2 x s16>)
+    ; GFX78: S_NOP 0, implicit [[CONCAT_VECTORS1]](<14 x s16>)
     ; GFX9-LABEL: name: build_vector_v7s16
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
@@ -285,7 +504,51 @@ body: |
     ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[DEF1]](<2 x s16>), [[DEF1]](<2 x s16>), [[DEF1]](<2 x s16>)
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<7 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<14 x s16>), 0
-    ; GFX9: S_NOP 0, implicit [[EXTRACT]](<7 x s16>)
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<7 x s16>), 0
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<8 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<7 x s16>), 0
+    ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<8 x s16>)
+    ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+    ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
+    ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32)
+    ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
+    ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)
+    ; GFX9: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
+    ; GFX9: [[COPY25:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY24]](s32), [[COPY25]](s32)
+    ; GFX9: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; GFX9: [[COPY27:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[COPY27]](s32)
+    ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>), [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>), [[BUILD_VECTOR_TRUNC10]](<2 x s16>)
+    ; GFX9: S_NOP 0, implicit [[CONCAT_VECTORS1]](<14 x s16>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = COPY $vgpr2
@@ -301,7 +564,8 @@ body: |
     %12:_(s16) = G_TRUNC %5
     %13:_(s16) = G_TRUNC %6
     %14:_(<7 x s16>) = G_BUILD_VECTOR %7, %8, %9, %10, %11, %12, %13
-    S_NOP 0, implicit %14
+    %15:_(<14 x s16>) = G_CONCAT_VECTORS %14, %14
+    S_NOP 0, implicit %15
 ...
 
 ---

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir
index 0857d286ff5c9..cb2592a8ab8db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir
@@ -238,7 +238,41 @@ body: |
     ; SI: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST3]]
     ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>), [[DEF2]](<2 x s16>)
     ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; SI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
+    ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
+    ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
+    ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
+    ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
+    ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
+    ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>)
+    ; SI: S_NOP 0, implicit [[CONCAT_VECTORS2]](<6 x s16>)
     ; VI-LABEL: name: test_fabs_v3s16
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -270,7 +304,41 @@ body: |
     ; VI: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST3]]
     ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>), [[DEF2]](<2 x s16>)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; VI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
+    ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
+    ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
+    ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
+    ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
+    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
+    ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
+    ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
+    ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>)
+    ; VI: S_NOP 0, implicit [[CONCAT_VECTORS2]](<6 x s16>)
     ; GFX9-LABEL: name: test_fabs_v3s16
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -294,10 +362,33 @@ body: |
     ; GFX9: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BUILD_VECTOR_TRUNC1]]
     ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>), [[DEF3]](<2 x s16>)
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
+    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
+    ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
+    ; GFX9: S_NOP 0, implicit [[CONCAT_VECTORS2]](<6 x s16>)
     %0:_(<3 x s16>) = G_IMPLICIT_DEF
     %1:_(<3 x s16>) = G_FABS %0
-    S_NOP 0, implicit %1
+    %2:_(<6 x s16>) = G_CONCAT_VECTORS %1, %1
+    S_NOP 0, implicit %2
 ...
 
 ---

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir
index 03e92158259ef..6823d5ba2f611 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir
@@ -257,7 +257,17 @@ body: |
     ; SI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[DEF2]](<2 x s16>)
     ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; VI-LABEL: name: test_fcanonicalize_v3s16
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -289,7 +299,17 @@ body: |
     ; VI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[DEF2]](<2 x s16>)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-LABEL: name: test_fcanonicalize_v3s16
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -313,10 +333,21 @@ body: |
     ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC1]]
     ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FCANONICALIZE]](<2 x s16>), [[FCANONICALIZE1]](<2 x s16>), [[DEF3]](<2 x s16>)
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
+    ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<3 x s16>) = G_IMPLICIT_DEF
     %1:_(<3 x s16>) = G_FCANONICALIZE %0
-    S_NOP 0, implicit %1
+    %2:_(<3 x s32>) = G_ANYEXT %1
+    S_NOP 0, implicit %2
 ...
 
 ---

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir
index cba2aef2e8d51..16e0c52988b0a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir
@@ -116,9 +116,10 @@ body: |
     ; GFX7: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[UV1]]
     ; GFX7: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1)
     ; GFX7: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
-    ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX7: [[TRUNC:%[0-9]+]]:_(<2 x s1>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
-    ; GFX7: S_NOP 0, implicit [[TRUNC]](<2 x s1>)
+    ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32)
+    ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32)
+    ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX7: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>)
     ; GFX8-LABEL: name: test_fcmp_v2s32
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
@@ -127,9 +128,10 @@ body: |
     ; GFX8: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[UV1]]
     ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1)
     ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
-    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX8: [[TRUNC:%[0-9]+]]:_(<2 x s1>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
-    ; GFX8: S_NOP 0, implicit [[TRUNC]](<2 x s1>)
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32)
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32)
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX8: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_fcmp_v2s32
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
@@ -138,14 +140,14 @@ body: |
     ; GFX9: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[UV1]]
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1)
     ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s1>) = G_TRUNC [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9: S_NOP 0, implicit [[TRUNC]](<2 x s1>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>)
     %0:_(s32) = G_CONSTANT i32 0
     %1:_(<2 x s32>) = G_BUILD_VECTOR %0, %0
     %2:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %3:_(<2 x s1>) = G_FCMP floatpred(oeq), %1, %2
-    S_NOP 0, implicit %3
+    %4:_(<2 x s32>) = G_ANYEXT %3
+    S_NOP 0, implicit %4
 ...
 
 ---
@@ -161,9 +163,10 @@ body: |
     ; GFX7: [[FCMP1:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(oeq), [[C]](s32), [[UV1]]
     ; GFX7: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1)
     ; GFX7: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
-    ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX7: [[TRUNC:%[0-9]+]]:_(<2 x s1>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
-    ; GFX7: S_NOP 0, implicit [[TRUNC]](<2 x s1>)
+    ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32)
+    ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32)
+    ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX7: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>)
     ; GFX8-LABEL: name: test_fcmp_v2s32_flags
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
@@ -172,9 +175,10 @@ body: |
     ; GFX8: [[FCMP1:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(oeq), [[C]](s32), [[UV1]]
     ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1)
     ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
-    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX8: [[TRUNC:%[0-9]+]]:_(<2 x s1>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
-    ; GFX8: S_NOP 0, implicit [[TRUNC]](<2 x s1>)
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32)
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32)
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX8: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_fcmp_v2s32_flags
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
@@ -183,14 +187,14 @@ body: |
     ; GFX9: [[FCMP1:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(oeq), [[C]](s32), [[UV1]]
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1)
     ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s1>) = G_TRUNC [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; GFX9: S_NOP 0, implicit [[TRUNC]](<2 x s1>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
+    ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>)
     %0:_(s32) = G_CONSTANT i32 0
     %1:_(<2 x s32>) = G_BUILD_VECTOR %0, %0
     %2:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %3:_(<2 x s1>) = nnan G_FCMP floatpred(oeq), %1, %2
-    S_NOP 0, implicit %3
+    %4:_(<2 x s32>) = G_ANYEXT %3
+    S_NOP 0, implicit %4
 ...
 
 ---
@@ -210,9 +214,11 @@ body: |
     ; GFX7: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1)
     ; GFX7: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
     ; GFX7: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1)
-    ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
-    ; GFX7: [[TRUNC:%[0-9]+]]:_(<3 x s1>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX7: S_NOP 0, implicit [[TRUNC]](<3 x s1>)
+    ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32)
+    ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32)
+    ; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32)
+    ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX7: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; GFX8-LABEL: name: test_fcmp_v3s32
     ; GFX8: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
     ; GFX8: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
@@ -224,9 +230,11 @@ body: |
     ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1)
     ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
     ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1)
-    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
-    ; GFX8: [[TRUNC:%[0-9]+]]:_(<3 x s1>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-    ; GFX8: S_NOP 0, implicit [[TRUNC]](<3 x s1>)
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32)
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32)
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32)
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+    ; GFX8: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-LABEL: name: test_fcmp_v3s32
     ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
     ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
@@ -237,19 +245,15 @@ body: |
     ; GFX9: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV2]](s32), [[UV5]]
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1)
     ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1)
-    ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF1]](s32)
-    ; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF2]](<2 x s16>)
-    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s1>) = G_TRUNC [[EXTRACT]](<3 x s16>)
-    ; GFX9: S_NOP 0, implicit [[TRUNC]](<3 x s1>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
+    ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<3 x s32>) = G_IMPLICIT_DEF
     %1:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %2:_(<3 x s1>) = G_FCMP floatpred(oeq), %0, %1
-    S_NOP 0, implicit %2
+    %3:_(<3 x s32>) = G_ANYEXT %2
+    S_NOP 0, implicit %3
+
 ...
 
 ---
@@ -272,9 +276,12 @@ body: |
     ; GFX7: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
     ; GFX7: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1)
     ; GFX7: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP3]](s1)
-    ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32)
-    ; GFX7: [[TRUNC:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX7: S_NOP 0, implicit [[TRUNC]](<4 x s1>)
+    ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32)
+    ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32)
+    ; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32)
+    ; GFX7: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ANYEXT3]](s32)
+    ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX7: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>)
     ; GFX8-LABEL: name: test_fcmp_v4s32
     ; GFX8: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
     ; GFX8: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load 16)
@@ -289,9 +296,12 @@ body: |
     ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
     ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1)
     ; GFX8: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP3]](s1)
-    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32)
-    ; GFX8: [[TRUNC:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; GFX8: S_NOP 0, implicit [[TRUNC]](<4 x s1>)
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32)
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32)
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32)
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ANYEXT3]](s32)
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
+    ; GFX8: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>)
     ; GFX9-LABEL: name: test_fcmp_v4s32
     ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
     ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load 16)
@@ -304,18 +314,16 @@ body: |
     ; GFX9: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV3]](s32), [[UV7]]
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1)
     ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
-    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
     ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1)
     ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP3]](s1)
-    ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9: S_NOP 0, implicit [[TRUNC]](<4 x s1>)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32)
+    ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>)
     %0:_(p1) = G_IMPLICIT_DEF
     %1:_(<4 x s32>) = G_LOAD %0 :: (volatile load 16)
     %2:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %3:_(<4 x s1>) = G_FCMP floatpred(oeq) , %1, %2
-    S_NOP 0, implicit %3
+    %4:_(<4 x s32>) = G_ANYEXT %3
+    S_NOP 0, implicit %4
 
 ...
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir
index 54f6bd787a820..6efc4cf6c8f37 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir
@@ -368,7 +368,17 @@ body: |
     ; SI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[DEF2]](<2 x s16>)
     ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; VI-LABEL: name: test_fcos_v3s16
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -407,7 +417,17 @@ body: |
     ; VI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[DEF2]](<2 x s16>)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-LABEL: name: test_fcos_v3s16
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -439,10 +459,21 @@ body: |
     ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF3]](<2 x s16>)
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<3 x s16>) = G_IMPLICIT_DEF
     %1:_(<3 x s16>) = G_FCOS %0
-    S_NOP 0, implicit %1
+    %2:_(<3 x s32>) = G_ANYEXT %1
+    S_NOP 0, implicit %2
 ...
 
 ---

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir
index 8d85c78ef5ac8..c6fca8d0cd326 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir
@@ -1391,7 +1391,17 @@ body: |
     ; SI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF3]](<2 x s16>)
     ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<6 x s16>), 0
-    ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; SI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; VI-LABEL: name: test_fdiv_v3s16
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -1450,7 +1460,17 @@ body: |
     ; VI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF3]](<2 x s16>)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<6 x s16>), 0
-    ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; VI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-LABEL: name: test_fdiv_v3s16
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -1505,7 +1525,17 @@ body: |
     ; GFX9: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF4]](<2 x s16>)
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<6 x s16>), 0
-    ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-UNSAFE-LABEL: name: test_fdiv_v3s16
     ; GFX9-UNSAFE: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9-UNSAFE: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -1548,7 +1578,17 @@ body: |
     ; GFX9-UNSAFE: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; GFX9-UNSAFE: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF4]](<2 x s16>)
     ; GFX9-UNSAFE: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<6 x s16>), 0
-    ; GFX9-UNSAFE: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; GFX9-UNSAFE: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9-UNSAFE: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX9-UNSAFE: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX9-UNSAFE: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; GFX9-UNSAFE: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX9-UNSAFE: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9-UNSAFE: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; GFX10-LABEL: name: test_fdiv_v3s16
     ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX10: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -1603,11 +1643,22 @@ body: |
     ; GFX10: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; GFX10: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF4]](<2 x s16>)
     ; GFX10: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<6 x s16>), 0
-    ; GFX10: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; GFX10: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX10: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX10: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; GFX10: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; GFX10: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; GFX10: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX10: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<3 x s16>) = G_IMPLICIT_DEF
     %1:_(<3 x s16>) = G_IMPLICIT_DEF
     %2:_(<3 x s16>) = G_FDIV %0, %1
-    S_NOP 0, implicit %2
+    %3:_(<3 x s32>) = G_ANYEXT %2
+    S_NOP 0, implicit %3
 ...
 
 ---

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir
index a7d45beee984f..825b5c5a0e296 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir
@@ -357,7 +357,17 @@ body: |
     ; SI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[DEF2]](<2 x s16>)
     ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; VI-LABEL: name: test_ffloor_v3s16
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -389,7 +399,17 @@ body: |
     ; VI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[DEF2]](<2 x s16>)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-LABEL: name: test_ffloor_v3s16
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -417,10 +437,21 @@ body: |
     ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF3]](<2 x s16>)
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<3 x s16>) = G_IMPLICIT_DEF
     %1:_(<3 x s16>) = G_FFLOOR %0
-    S_NOP 0, implicit %1
+    %2:_(<3 x s32>) = G_ANYEXT %1
+    S_NOP 0, implicit %2
 ...
 
 ---

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir
index 35f229088167c..487019262f14b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir
@@ -236,7 +236,20 @@ body: |
     ; SI: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST3]]
     ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>), [[DEF2]](<2 x s16>)
     ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND3]](s32), [[AND4]](s32), [[AND5]](s32)
+    ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; VI-LABEL: name: test_fneg_v3s16
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -268,7 +281,20 @@ body: |
     ; VI: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST3]]
     ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>), [[DEF2]](<2 x s16>)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND3]](s32), [[AND4]](s32), [[AND5]](s32)
+    ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-LABEL: name: test_fneg_v3s16
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -292,10 +318,25 @@ body: |
     ; GFX9: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BUILD_VECTOR_TRUNC1]]
     ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>), [[DEF3]](<2 x s16>)
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32)
+    ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<3 x s16>) = G_IMPLICIT_DEF
     %1:_(<3 x s16>) = G_FNEG %0
-    S_NOP 0, implicit %1
+    %2:_(<3 x s32>) = G_ZEXT %1
+    S_NOP 0, implicit %2
 ...
 
 ---

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir
index a62da1ad54b78..9700601e4ed0c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir
@@ -368,7 +368,17 @@ body: |
     ; SI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[DEF2]](<2 x s16>)
     ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; VI-LABEL: name: test_fsin_v3s16
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -407,7 +417,17 @@ body: |
     ; VI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[DEF2]](<2 x s16>)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-LABEL: name: test_fsin_v3s16
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -439,10 +459,21 @@ body: |
     ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF3]](<2 x s16>)
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<3 x s16>) = G_IMPLICIT_DEF
     %1:_(<3 x s16>) = G_FSIN %0
-    S_NOP 0, implicit %1
+    %2:_(<3 x s32>) = G_ANYEXT %1
+    S_NOP 0, implicit %2
 ...
 
 ---

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir
index 033f8dd2dd18b..fc048400c0ce6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir
@@ -281,7 +281,17 @@ body: |
     ; SI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[DEF2]](<2 x s16>)
     ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; VI-LABEL: name: test_fsqrt_v3s16
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -313,7 +323,17 @@ body: |
     ; VI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[DEF2]](<2 x s16>)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-LABEL: name: test_fsqrt_v3s16
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -341,10 +361,21 @@ body: |
     ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF3]](<2 x s16>)
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
-    ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+    ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+    ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<3 x s16>) = G_IMPLICIT_DEF
     %1:_(<3 x s16>) = G_FSQRT %0
-    S_NOP 0, implicit %1
+    %2:_(<3 x s32>) = G_ANYEXT %1
+    S_NOP 0, implicit %2
 ...
 
 ---

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir
index 5b4f188a6334a..c44770efcb614 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir
@@ -400,7 +400,17 @@ body: |
     ; SI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF3]](<2 x s16>)
     ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<6 x s16>), 0
-    ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; SI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+    ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; VI-LABEL: name: test_smax_v3s16
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -444,7 +454,17 @@ body: |
     ; VI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF3]](<2 x s16>)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<6 x s16>), 0
-    ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; VI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-LABEL: name: test_smax_v3s16
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -462,11 +482,23 @@ body: |
     ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMAX]](<2 x s16>), [[SMAX1]](<2 x s16>)
     ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
     ; GFX9: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>), [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>)
-    ; GFX9: S_NOP 0, implicit [[UV12]](<3 x s16>)
+    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV12]](<3 x s16>), 0
+    ; GFX9: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<3 x s16>) = G_IMPLICIT_DEF
     %1:_(<3 x s16>) = G_IMPLICIT_DEF
     %2:_(<3 x s16>) = G_SMAX %0, %1
-    S_NOP 0, implicit %2
+    %3:_(<3 x s32>) = G_ANYEXT %2
+    S_NOP 0, implicit %3
 ...
 
 ---

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir
index dcb548d7dcc08..6c9b66db1ac95 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir
@@ -400,7 +400,17 @@ body: |
     ; SI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF3]](<2 x s16>)
     ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<6 x s16>), 0
-    ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; SI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+    ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; VI-LABEL: name: test_smin_v3s16
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -444,7 +454,17 @@ body: |
     ; VI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF3]](<2 x s16>)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<6 x s16>), 0
-    ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; VI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-LABEL: name: test_smin_v3s16
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -462,11 +482,23 @@ body: |
     ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMIN]](<2 x s16>), [[SMIN1]](<2 x s16>)
     ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
     ; GFX9: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>), [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>)
-    ; GFX9: S_NOP 0, implicit [[UV12]](<3 x s16>)
+    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV12]](<3 x s16>), 0
+    ; GFX9: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<3 x s16>) = G_IMPLICIT_DEF
     %1:_(<3 x s16>) = G_IMPLICIT_DEF
     %2:_(<3 x s16>) = G_SMIN %0, %1
-    S_NOP 0, implicit %2
+    %3:_(<3 x s32>) = G_ANYEXT %2
+    S_NOP 0, implicit %3
 ...
 
 ---

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir
index 06aff5e241372..7a2012cf9d27b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir
@@ -211,15 +211,31 @@ body: |
 name: test_trunc_v4s32_to_v4s1
 body: |
   bb.0:
-    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
 
     ; CHECK-LABEL: name: test_trunc_v4s32_to_v4s1
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[COPY]](<4 x s32>)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](<4 x s1>)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[UV]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[UV1]](s32)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s1) = G_TRUNC [[UV2]](s32)
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s1) = G_TRUNC [[UV3]](s32)
+    ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+    ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
+    ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[UV4]], [[UV8]]
+    ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC1]](s1), [[UV5]], [[UV9]]
+    ; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC2]](s1), [[UV6]], [[UV10]]
+    ; CHECK: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC3]](s1), [[UV7]], [[UV11]]
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32)
+    ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    %1:_(<4 x s1>) = G_TRUNC %0
-    S_ENDPGM 0, implicit %1
+    %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    %3:_(<4 x s1>) = G_TRUNC %0
+    %4:_(<4 x s32>) = G_SELECT %3, %1, %2
+    S_ENDPGM 0, implicit %4
 ...
 
 ---
@@ -375,12 +391,14 @@ body: |
     ; CHECK-LABEL: name: test_trunc_v2s96_to_v2s8
     ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s96>) = G_BUILD_VECTOR [[COPY]](s96), [[COPY1]](s96)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s96>)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](<2 x s8>)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s96)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s96)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
+    ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>)
     %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
     %2:_(<2 x s96>) = G_BUILD_VECTOR %0, %1
     %3:_(<2 x s8>) = G_TRUNC %2
-    S_ENDPGM 0, implicit %3
+    %4:_(<2 x s32>) = G_ANYEXT %3
+    S_ENDPGM 0, implicit %4
 ...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir
index a7123b7315812..eee22b924357b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir
@@ -402,7 +402,17 @@ body: |
     ; SI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF3]](<2 x s16>)
     ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<6 x s16>), 0
-    ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; SI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+    ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; VI-LABEL: name: test_umax_v3s16
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -446,7 +456,17 @@ body: |
     ; VI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF3]](<2 x s16>)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<6 x s16>), 0
-    ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; VI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-LABEL: name: test_umax_v3s16
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -464,11 +484,23 @@ body: |
     ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMAX]](<2 x s16>), [[UMAX1]](<2 x s16>)
     ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
     ; GFX9: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>), [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>)
-    ; GFX9: S_NOP 0, implicit [[UV12]](<3 x s16>)
+    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV12]](<3 x s16>), 0
+    ; GFX9: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<3 x s16>) = G_IMPLICIT_DEF
     %1:_(<3 x s16>) = G_IMPLICIT_DEF
     %2:_(<3 x s16>) = G_UMAX %0, %1
-    S_NOP 0, implicit %2
+    %3:_(<3 x s32>) = G_ANYEXT %2
+    S_NOP 0, implicit %3
 ...
 
 ---

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir
index 84b0975e8e992..ad8b961ed5ddb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir
@@ -402,7 +402,17 @@ body: |
     ; SI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF3]](<2 x s16>)
     ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<6 x s16>), 0
-    ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; SI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+    ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; VI-LABEL: name: test_umin_v3s16
     ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -446,7 +456,17 @@ body: |
     ; VI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
     ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF3]](<2 x s16>)
     ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<6 x s16>), 0
-    ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>)
+    ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+    ; VI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+    ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+    ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32)
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32)
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     ; GFX9-LABEL: name: test_umin_v3s16
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -464,11 +484,23 @@ body: |
     ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMIN]](<2 x s16>), [[UMIN1]](<2 x s16>)
     ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
     ; GFX9: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>), [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>)
-    ; GFX9: S_NOP 0, implicit [[UV12]](<3 x s16>)
+    ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV12]](<3 x s16>), 0
+    ; GFX9: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
     %0:_(<3 x s16>) = G_IMPLICIT_DEF
     %1:_(<3 x s16>) = G_IMPLICIT_DEF
     %2:_(<3 x s16>) = G_UMIN %0, %1
-    S_NOP 0, implicit %2
+    %3:_(<3 x s32>) = G_ANYEXT %2
+    S_NOP 0, implicit %3
 ...
 
 ---


        


More information about the llvm-commits mailing list