[llvm] [AMDGPU][Legalizer] Avoid pack/unpack for G_FSHR (PR #156796)

Anshil Gandhi via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 3 21:24:07 PDT 2025


https://github.com/gandhi56 created https://github.com/llvm/llvm-project/pull/156796

Scalarize G_FSHR only if the subtarget does not support V2S16 type.

>From 3de96c076423ef196e0225fb7c7f6748c75ae341 Mon Sep 17 00:00:00 2001
From: Anshil Gandhi <Anshil.Gandhi at amd.com>
Date: Wed, 3 Sep 2025 23:09:35 -0500
Subject: [PATCH] [AMDGPU] Avoid pack/unpack for G_FSHR

Scalarize G_FSHR only if the subtarget does not support V2S16 type.
---
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |  20 +-
 llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll   | 825 +++++----------
 .../AMDGPU/GlobalISel/legalize-fshr.mir       | 950 ++++++------------
 3 files changed, 611 insertions(+), 1184 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 55a76f1172cb9..197c7009d8e86 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2082,13 +2082,19 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
     .scalarize(0)
     .lower();
 
-  // TODO: Only Try to form v2s16 with legal packed instructions.
-  getActionDefinitionsBuilder(G_FSHR)
-    .legalFor({{S32, S32}})
-    .lowerFor({{V2S16, V2S16}})
-    .clampMaxNumElementsStrict(0, S16, 2)
-    .scalarize(0)
-    .lower();
+  if (ST.hasVOP3PInsts()) {
+    getActionDefinitionsBuilder(G_FSHR)
+      .legalFor({{S32, S32}})
+      .lowerFor({{V2S16, V2S16}})
+      .clampMaxNumElementsStrict(0, S16, 2)
+      .scalarize(0)
+      .lower();
+  } else {
+    getActionDefinitionsBuilder(G_FSHR)
+      .legalFor({{S32, S32}})
+      .scalarize(0)
+      .lower();
+  }
 
   if (ST.hasVOP3PInsts()) {
     getActionDefinitionsBuilder(G_FSHL)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
index d1ba24673043d..7338bf830a652 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
@@ -3404,32 +3404,19 @@ define amdgpu_ps half @v_fshr_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt)
 define amdgpu_ps i32 @s_fshr_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
 ; GFX6-LABEL: s_fshr_v2i16:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_lshl_b32 s5, s5, 16
-; GFX6-NEXT:    s_and_b32 s4, s4, 0xffff
-; GFX6-NEXT:    s_or_b32 s4, s5, s4
-; GFX6-NEXT:    s_bfe_u32 s5, s2, 0xf0001
-; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX6-NEXT:    s_lshr_b32 s5, s5, 14
-; GFX6-NEXT:    s_or_b32 s0, s0, s5
-; GFX6-NEXT:    s_bfe_u32 s5, s3, 0xf0001
-; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
-; GFX6-NEXT:    s_lshr_b32 s5, s5, 14
-; GFX6-NEXT:    s_lshl_b32 s2, s2, 1
-; GFX6-NEXT:    s_xor_b32 s4, s4, -1
-; GFX6-NEXT:    s_or_b32 s1, s1, s5
-; GFX6-NEXT:    s_lshr_b32 s5, s4, 16
 ; GFX6-NEXT:    s_and_b32 s6, s4, 15
 ; GFX6-NEXT:    s_andn2_b32 s4, 15, s4
-; GFX6-NEXT:    s_bfe_u32 s2, s2, 0xf0001
-; GFX6-NEXT:    s_lshl_b32 s0, s0, s6
-; GFX6-NEXT:    s_lshr_b32 s2, s2, s4
-; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
+; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX6-NEXT:    s_and_b32 s2, s2, 0xffff
+; GFX6-NEXT:    s_lshl_b32 s0, s0, s4
+; GFX6-NEXT:    s_lshr_b32 s2, s2, s6
 ; GFX6-NEXT:    s_or_b32 s0, s0, s2
 ; GFX6-NEXT:    s_and_b32 s2, s5, 15
 ; GFX6-NEXT:    s_andn2_b32 s4, 15, s5
-; GFX6-NEXT:    s_lshl_b32 s1, s1, s2
-; GFX6-NEXT:    s_bfe_u32 s2, s3, 0xf0001
-; GFX6-NEXT:    s_lshr_b32 s2, s2, s4
+; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
+; GFX6-NEXT:    s_and_b32 s3, s3, 0xffff
+; GFX6-NEXT:    s_lshl_b32 s1, s1, s4
+; GFX6-NEXT:    s_lshr_b32 s2, s3, s2
 ; GFX6-NEXT:    s_or_b32 s1, s1, s2
 ; GFX6-NEXT:    s_and_b32 s1, 0xffff, s1
 ; GFX6-NEXT:    s_and_b32 s0, 0xffff, s0
@@ -3439,33 +3426,22 @@ define amdgpu_ps i32 @s_fshr_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <
 ;
 ; GFX8-LABEL: s_fshr_v2i16:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_and_b32 s5, 0xffff, s1
 ; GFX8-NEXT:    s_lshr_b32 s3, s0, 16
 ; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
-; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX8-NEXT:    s_lshr_b32 s5, s5, 15
-; GFX8-NEXT:    s_lshl_b32 s1, s1, 1
-; GFX8-NEXT:    s_or_b32 s0, s0, s5
-; GFX8-NEXT:    s_lshl_b32 s3, s3, 1
-; GFX8-NEXT:    s_lshr_b32 s5, s4, 15
-; GFX8-NEXT:    s_xor_b32 s2, s2, -1
-; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
-; GFX8-NEXT:    s_or_b32 s3, s3, s5
 ; GFX8-NEXT:    s_lshr_b32 s5, s2, 16
 ; GFX8-NEXT:    s_and_b32 s6, s2, 15
 ; GFX8-NEXT:    s_andn2_b32 s2, 15, s2
-; GFX8-NEXT:    s_lshr_b32 s1, s1, 1
-; GFX8-NEXT:    s_lshl_b32 s0, s0, s6
-; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
-; GFX8-NEXT:    s_lshl_b32 s4, s4, 1
+; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
+; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
+; GFX8-NEXT:    s_lshr_b32 s1, s1, s6
 ; GFX8-NEXT:    s_or_b32 s0, s0, s1
 ; GFX8-NEXT:    s_and_b32 s1, s5, 15
-; GFX8-NEXT:    s_lshl_b32 s1, s3, s1
-; GFX8-NEXT:    s_and_b32 s3, 0xffff, s4
 ; GFX8-NEXT:    s_andn2_b32 s2, 15, s5
-; GFX8-NEXT:    s_lshr_b32 s3, s3, 1
-; GFX8-NEXT:    s_lshr_b32 s2, s3, s2
-; GFX8-NEXT:    s_or_b32 s1, s1, s2
+; GFX8-NEXT:    s_lshl_b32 s3, s3, 1
+; GFX8-NEXT:    s_lshl_b32 s2, s3, s2
+; GFX8-NEXT:    s_lshr_b32 s1, s4, s1
+; GFX8-NEXT:    s_or_b32 s1, s2, s1
 ; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
 ; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
@@ -3547,65 +3523,43 @@ define <2 x i16> @v_fshr_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) {
 ; GFX6-LABEL: v_fshr_v2i16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
-; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX6-NEXT:    v_or_b32_e32 v4, v5, v4
-; GFX6-NEXT:    v_bfe_u32 v5, v2, 1, 15
-; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 14, v5
-; GFX6-NEXT:    v_or_b32_e32 v0, v0, v5
-; GFX6-NEXT:    v_bfe_u32 v5, v3, 1, 15
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 14, v5
-; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
-; GFX6-NEXT:    v_or_b32_e32 v1, v1, v5
-; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
-; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
 ; GFX6-NEXT:    v_and_b32_e32 v6, 15, v4
 ; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
 ; GFX6-NEXT:    v_and_b32_e32 v4, 15, v4
-; GFX6-NEXT:    v_bfe_u32 v2, v2, 1, 15
-; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v6, v0
-; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
-; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
+; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v4, v0
+; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v6, v2
+; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v5
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX6-NEXT:    v_and_b32_e32 v2, 15, v5
-; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v5
 ; GFX6-NEXT:    v_and_b32_e32 v4, 15, v4
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v2, v1
-; GFX6-NEXT:    v_bfe_u32 v2, v3, 1, 15
-; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
+; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
+; GFX6-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v4, v1
+; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v2, v3
 ; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_fshr_v2i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 1, v0
-; GFX8-NEXT:    v_lshrrev_b16_e32 v4, 15, v1
-; GFX8-NEXT:    v_or_b32_e32 v3, v3, v4
-; GFX8-NEXT:    v_mov_b32_e32 v4, 1
-; GFX8-NEXT:    v_mov_b32_e32 v5, 15
-; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_lshrrev_b16_sdwa v6, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
-; GFX8-NEXT:    v_or_b32_e32 v0, v0, v6
-; GFX8-NEXT:    v_lshlrev_b16_e32 v6, 1, v1
-; GFX8-NEXT:    v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_and_b32_e32 v4, 15, v2
-; GFX8-NEXT:    v_xor_b32_e32 v7, -1, v2
-; GFX8-NEXT:    v_and_b32_e32 v7, 15, v7
-; GFX8-NEXT:    v_lshlrev_b16_e32 v3, v4, v3
-; GFX8-NEXT:    v_lshrrev_b16_e32 v4, 1, v6
-; GFX8-NEXT:    v_lshrrev_b16_e32 v4, v7, v4
-; GFX8-NEXT:    v_or_b32_e32 v3, v3, v4
-; GFX8-NEXT:    v_and_b32_sdwa v4, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v2
+; GFX8-NEXT:    v_and_b32_e32 v3, 15, v2
+; GFX8-NEXT:    v_and_b32_e32 v4, 15, v4
+; GFX8-NEXT:    v_lshlrev_b16_e32 v5, 1, v0
+; GFX8-NEXT:    v_lshlrev_b16_e32 v4, v4, v5
+; GFX8-NEXT:    v_lshrrev_b16_e32 v3, v3, v1
+; GFX8-NEXT:    v_or_b32_e32 v3, v4, v3
+; GFX8-NEXT:    v_mov_b32_e32 v4, 15
 ; GFX8-NEXT:    v_mov_b32_e32 v5, -1
+; GFX8-NEXT:    v_and_b32_sdwa v4, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; GFX8-NEXT:    v_xor_b32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_mov_b32_e32 v5, 1
 ; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
-; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 1, v1
-; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v4, v0
-; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
+; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
+; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
@@ -3657,13 +3611,11 @@ define <2 x i16> @v_fshr_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) {
 ; GFX6-LABEL: v_fshr_v2i16_4_8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_bfe_u32 v2, v2, 1, 15
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 12, v0
-; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 3, v2
+; GFX6-NEXT:    v_bfe_u32 v2, v2, 4, 12
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
-; GFX6-NEXT:    v_bfe_u32 v2, v3, 1, 15
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 7, v2
+; GFX6-NEXT:    v_bfe_u32 v2, v3, 8, 8
 ; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -3716,35 +3668,22 @@ define <2 x i16> @v_fshr_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) {
 define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> %amt) {
 ; GFX6-LABEL: v_fshr_v2i16_ssv:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
-; GFX6-NEXT:    s_bfe_u32 s4, s2, 0xf0001
-; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX6-NEXT:    s_lshr_b32 s4, s4, 14
-; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
-; GFX6-NEXT:    s_or_b32 s0, s0, s4
-; GFX6-NEXT:    s_lshl_b32 s2, s2, 1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
 ; GFX6-NEXT:    v_and_b32_e32 v2, 15, v0
 ; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
 ; GFX6-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX6-NEXT:    v_lshl_b32_e32 v2, s0, v2
-; GFX6-NEXT:    s_bfe_u32 s0, s2, 0xf0001
-; GFX6-NEXT:    s_bfe_u32 s4, s3, 0xf0001
-; GFX6-NEXT:    v_lshr_b32_e32 v0, s0, v0
-; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
-; GFX6-NEXT:    s_lshr_b32 s4, s4, 14
-; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
-; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
+; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX6-NEXT:    v_lshl_b32_e32 v0, s0, v0
+; GFX6-NEXT:    s_and_b32 s0, s2, 0xffff
+; GFX6-NEXT:    v_lshr_b32_e32 v2, s0, v2
+; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX6-NEXT:    v_and_b32_e32 v2, 15, v1
 ; GFX6-NEXT:    v_xor_b32_e32 v1, -1, v1
-; GFX6-NEXT:    s_or_b32 s1, s1, s4
 ; GFX6-NEXT:    v_and_b32_e32 v1, 15, v1
-; GFX6-NEXT:    s_bfe_u32 s0, s3, 0xf0001
-; GFX6-NEXT:    v_lshl_b32_e32 v2, s1, v2
-; GFX6-NEXT:    v_lshr_b32_e32 v1, s0, v1
-; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
+; GFX6-NEXT:    s_lshl_b32 s0, s1, 1
+; GFX6-NEXT:    v_lshl_b32_e32 v1, s0, v1
+; GFX6-NEXT:    s_and_b32 s0, s3, 0xffff
+; GFX6-NEXT:    v_lshr_b32_e32 v2, s0, v2
+; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
 ; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
@@ -3753,36 +3692,24 @@ define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %
 ;
 ; GFX8-LABEL: v_fshr_v2i16_ssv:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_and_b32 s4, 0xffff, s1
+; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v0
 ; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
-; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX8-NEXT:    s_lshr_b32 s4, s4, 15
-; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
-; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
-; GFX8-NEXT:    s_or_b32 s0, s0, s4
-; GFX8-NEXT:    s_lshl_b32 s1, s1, 1
 ; GFX8-NEXT:    v_and_b32_e32 v1, 15, v0
-; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v0
-; GFX8-NEXT:    v_lshlrev_b16_e64 v1, v1, s0
-; GFX8-NEXT:    s_and_b32 s0, 0xffff, s1
 ; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
-; GFX8-NEXT:    s_lshr_b32 s0, s0, 1
-; GFX8-NEXT:    v_lshrrev_b16_e64 v2, v2, s0
-; GFX8-NEXT:    s_lshr_b32 s4, s3, 15
-; GFX8-NEXT:    s_lshl_b32 s3, s3, 1
-; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
+; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX8-NEXT:    v_lshlrev_b16_e64 v2, v2, s0
+; GFX8-NEXT:    v_lshrrev_b16_e64 v1, v1, s1
+; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 15
 ; GFX8-NEXT:    v_mov_b32_e32 v3, -1
-; GFX8-NEXT:    s_lshl_b32 s2, s2, 1
 ; GFX8-NEXT:    v_and_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; GFX8-NEXT:    v_xor_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    s_and_b32 s0, 0xffff, s3
-; GFX8-NEXT:    s_or_b32 s2, s2, s4
+; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
 ; GFX8-NEXT:    v_and_b32_e32 v0, 15, v0
-; GFX8-NEXT:    s_lshr_b32 s0, s0, 1
-; GFX8-NEXT:    v_lshlrev_b16_e64 v2, v2, s2
-; GFX8-NEXT:    v_lshrrev_b16_e64 v0, v0, s0
-; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
+; GFX8-NEXT:    s_lshl_b32 s0, s2, 1
+; GFX8-NEXT:    v_lshlrev_b16_e64 v0, v0, s0
+; GFX8-NEXT:    v_lshrrev_b16_e64 v2, v2, s3
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
@@ -3838,33 +3765,20 @@ define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %
 define amdgpu_ps float @v_fshr_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <2 x i16> inreg %amt) {
 ; GFX6-LABEL: v_fshr_v2i16_svs:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    v_bfe_u32 v2, v0, 1, 15
-; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
-; GFX6-NEXT:    s_and_b32 s2, s2, 0xffff
+; GFX6-NEXT:    s_and_b32 s4, s2, 15
+; GFX6-NEXT:    s_andn2_b32 s2, 15, s2
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 14, v2
-; GFX6-NEXT:    v_bfe_u32 v3, v1, 1, 15
-; GFX6-NEXT:    s_or_b32 s2, s3, s2
-; GFX6-NEXT:    v_or_b32_e32 v2, s0, v2
-; GFX6-NEXT:    s_lshl_b32 s0, s1, 1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 14, v3
-; GFX6-NEXT:    v_or_b32_e32 v3, s0, v3
-; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX6-NEXT:    s_xor_b32 s0, s2, -1
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
-; GFX6-NEXT:    s_lshr_b32 s1, s0, 16
-; GFX6-NEXT:    s_and_b32 s2, s0, 15
-; GFX6-NEXT:    s_andn2_b32 s0, 15, s0
-; GFX6-NEXT:    v_bfe_u32 v0, v0, 1, 15
-; GFX6-NEXT:    v_lshlrev_b32_e32 v2, s2, v2
-; GFX6-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
-; GFX6-NEXT:    s_and_b32 s0, s1, 15
-; GFX6-NEXT:    s_andn2_b32 s1, 15, s1
-; GFX6-NEXT:    v_bfe_u32 v1, v1, 1, 15
-; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
-; GFX6-NEXT:    v_lshlrev_b32_e32 v2, s0, v3
-; GFX6-NEXT:    v_lshrrev_b32_e32 v1, s1, v1
-; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
+; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
+; GFX6-NEXT:    v_lshrrev_b32_e32 v0, s4, v0
+; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
+; GFX6-NEXT:    s_and_b32 s0, s3, 15
+; GFX6-NEXT:    s_andn2_b32 s2, 15, s3
+; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
+; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX6-NEXT:    s_lshl_b32 s1, s1, s2
+; GFX6-NEXT:    v_lshrrev_b32_e32 v1, s0, v1
+; GFX6-NEXT:    v_or_b32_e32 v1, s1, v1
 ; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
@@ -3874,31 +3788,21 @@ define amdgpu_ps float @v_fshr_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <
 ; GFX8-LABEL: v_fshr_v2i16_svs:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
+; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
+; GFX8-NEXT:    s_and_b32 s4, s1, 15
+; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
 ; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 15, v0
-; GFX8-NEXT:    v_mov_b32_e32 v2, 15
+; GFX8-NEXT:    s_lshl_b32 s0, s0, s1
+; GFX8-NEXT:    v_lshrrev_b16_e32 v1, s4, v0
 ; GFX8-NEXT:    v_or_b32_e32 v1, s0, v1
-; GFX8-NEXT:    s_lshl_b32 s0, s2, 1
-; GFX8-NEXT:    v_lshrrev_b16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_or_b32_e32 v2, s0, v2
-; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 1, v0
-; GFX8-NEXT:    v_mov_b32_e32 v4, 1
-; GFX8-NEXT:    s_xor_b32 s0, s1, -1
-; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
-; GFX8-NEXT:    s_and_b32 s2, s0, 15
-; GFX8-NEXT:    s_andn2_b32 s0, 15, s0
-; GFX8-NEXT:    v_lshrrev_b16_e32 v3, 1, v3
-; GFX8-NEXT:    v_lshrrev_b16_e32 v3, s0, v3
-; GFX8-NEXT:    s_and_b32 s0, s1, 15
-; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
-; GFX8-NEXT:    v_lshrrev_b16_e32 v0, 1, v0
-; GFX8-NEXT:    v_lshlrev_b16_e32 v2, s0, v2
-; GFX8-NEXT:    v_lshrrev_b16_e32 v0, s1, v0
-; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
-; GFX8-NEXT:    v_lshlrev_b16_e32 v1, s2, v1
+; GFX8-NEXT:    s_and_b32 s0, s3, 15
+; GFX8-NEXT:    s_andn2_b32 s1, 15, s3
+; GFX8-NEXT:    s_lshl_b32 s2, s2, 1
+; GFX8-NEXT:    v_mov_b32_e32 v2, s0
+; GFX8-NEXT:    s_lshl_b32 s1, s2, s1
+; GFX8-NEXT:    v_lshrrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_or_b32_e32 v0, s1, v0
 ; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; GFX8-NEXT:    ; return to shader part epilog
@@ -3963,32 +3867,19 @@ define amdgpu_ps float @v_fshr_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <
 define amdgpu_ps float @v_fshr_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
 ; GFX6-LABEL: v_fshr_v2i16_vss:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
-; GFX6-NEXT:    s_and_b32 s2, s2, 0xffff
-; GFX6-NEXT:    s_or_b32 s2, s3, s2
-; GFX6-NEXT:    s_bfe_u32 s3, s0, 0xf0001
-; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX6-NEXT:    s_lshr_b32 s3, s3, 14
-; GFX6-NEXT:    v_or_b32_e32 v0, s3, v0
-; GFX6-NEXT:    s_bfe_u32 s3, s1, 0xf0001
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
-; GFX6-NEXT:    s_lshr_b32 s3, s3, 14
-; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX6-NEXT:    s_xor_b32 s2, s2, -1
-; GFX6-NEXT:    v_or_b32_e32 v1, s3, v1
-; GFX6-NEXT:    s_lshr_b32 s3, s2, 16
 ; GFX6-NEXT:    s_and_b32 s4, s2, 15
 ; GFX6-NEXT:    s_andn2_b32 s2, 15, s2
-; GFX6-NEXT:    s_bfe_u32 s0, s0, 0xf0001
-; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s4, v0
-; GFX6-NEXT:    s_lshr_b32 s0, s0, s2
-; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
+; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
+; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s2, v0
+; GFX6-NEXT:    s_lshr_b32 s0, s0, s4
 ; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
 ; GFX6-NEXT:    s_and_b32 s0, s3, 15
 ; GFX6-NEXT:    s_andn2_b32 s2, 15, s3
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s0, v1
-; GFX6-NEXT:    s_bfe_u32 s0, s1, 0xf0001
-; GFX6-NEXT:    s_lshr_b32 s0, s0, s2
+; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
+; GFX6-NEXT:    s_and_b32 s1, s1, 0xffff
+; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s2, v1
+; GFX6-NEXT:    s_lshr_b32 s0, s1, s0
 ; GFX6-NEXT:    v_or_b32_e32 v1, s0, v1
 ; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
@@ -3998,32 +3889,21 @@ define amdgpu_ps float @v_fshr_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <
 ;
 ; GFX8-LABEL: v_fshr_v2i16_vss:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_and_b32 s3, 0xffff, s0
 ; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
-; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 1, v0
-; GFX8-NEXT:    s_lshr_b32 s3, s3, 15
-; GFX8-NEXT:    v_mov_b32_e32 v2, 1
-; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX8-NEXT:    v_or_b32_e32 v1, s3, v1
-; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    s_lshr_b32 s3, s2, 15
-; GFX8-NEXT:    s_xor_b32 s1, s1, -1
-; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
-; GFX8-NEXT:    v_or_b32_e32 v0, s3, v0
 ; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
 ; GFX8-NEXT:    s_and_b32 s4, s1, 15
 ; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
-; GFX8-NEXT:    s_lshr_b32 s0, s0, 1
-; GFX8-NEXT:    v_lshlrev_b16_e32 v1, s4, v1
-; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
-; GFX8-NEXT:    s_lshl_b32 s2, s2, 1
+; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 1, v0
+; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
+; GFX8-NEXT:    v_lshlrev_b16_e32 v1, s1, v1
+; GFX8-NEXT:    s_lshr_b32 s0, s0, s4
+; GFX8-NEXT:    v_mov_b32_e32 v2, 1
 ; GFX8-NEXT:    v_or_b32_e32 v1, s0, v1
 ; GFX8-NEXT:    s_and_b32 s0, s3, 15
-; GFX8-NEXT:    v_lshlrev_b16_e32 v0, s0, v0
-; GFX8-NEXT:    s_and_b32 s0, 0xffff, s2
 ; GFX8-NEXT:    s_andn2_b32 s1, 15, s3
-; GFX8-NEXT:    s_lshr_b32 s0, s0, 1
-; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
+; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_lshlrev_b16_e32 v0, s1, v0
+; GFX8-NEXT:    s_lshr_b32 s0, s2, s0
 ; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
 ; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
@@ -4084,46 +3964,26 @@ define amdgpu_ps float @v_fshr_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <
 define amdgpu_ps i48 @s_fshr_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <3 x i16> inreg %amt) {
 ; GFX6-LABEL: s_fshr_v3i16:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_and_b32 s7, s7, 0xffff
-; GFX6-NEXT:    s_and_b32 s6, s6, 0xffff
-; GFX6-NEXT:    s_lshl_b32 s7, s7, 16
-; GFX6-NEXT:    s_or_b32 s6, s6, s7
-; GFX6-NEXT:    s_and_b32 s7, s8, 0xffff
-; GFX6-NEXT:    s_bfe_u32 s8, s3, 0xf0001
-; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX6-NEXT:    s_lshr_b32 s8, s8, 14
-; GFX6-NEXT:    s_or_b32 s0, s0, s8
-; GFX6-NEXT:    s_bfe_u32 s8, s4, 0xf0001
-; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
-; GFX6-NEXT:    s_lshr_b32 s8, s8, 14
-; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
-; GFX6-NEXT:    s_xor_b32 s6, s6, -1
-; GFX6-NEXT:    s_or_b32 s1, s1, s8
-; GFX6-NEXT:    s_lshr_b32 s8, s6, 16
 ; GFX6-NEXT:    s_and_b32 s9, s6, 15
 ; GFX6-NEXT:    s_andn2_b32 s6, 15, s6
-; GFX6-NEXT:    s_bfe_u32 s3, s3, 0xf0001
-; GFX6-NEXT:    s_lshl_b32 s0, s0, s9
-; GFX6-NEXT:    s_lshr_b32 s3, s3, s6
-; GFX6-NEXT:    s_lshl_b32 s4, s4, 1
+; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX6-NEXT:    s_and_b32 s3, s3, 0xffff
+; GFX6-NEXT:    s_lshl_b32 s0, s0, s6
+; GFX6-NEXT:    s_lshr_b32 s3, s3, s9
 ; GFX6-NEXT:    s_or_b32 s0, s0, s3
-; GFX6-NEXT:    s_and_b32 s3, s8, 15
-; GFX6-NEXT:    s_andn2_b32 s6, 15, s8
-; GFX6-NEXT:    s_lshl_b32 s1, s1, s3
-; GFX6-NEXT:    s_bfe_u32 s3, s4, 0xf0001
-; GFX6-NEXT:    s_lshr_b32 s3, s3, s6
-; GFX6-NEXT:    s_or_b32 s1, s1, s3
-; GFX6-NEXT:    s_bfe_u32 s3, s5, 0xf0001
+; GFX6-NEXT:    s_and_b32 s3, s7, 15
+; GFX6-NEXT:    s_andn2_b32 s6, 15, s7
+; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
+; GFX6-NEXT:    s_and_b32 s4, s4, 0xffff
+; GFX6-NEXT:    s_lshl_b32 s1, s1, s6
+; GFX6-NEXT:    s_lshr_b32 s3, s4, s3
+; GFX6-NEXT:    s_andn2_b32 s4, 15, s8
 ; GFX6-NEXT:    s_lshl_b32 s2, s2, 1
-; GFX6-NEXT:    s_lshr_b32 s3, s3, 14
-; GFX6-NEXT:    s_or_b32 s2, s2, s3
-; GFX6-NEXT:    s_lshl_b32 s3, s5, 1
-; GFX6-NEXT:    s_xor_b32 s4, s7, -1
-; GFX6-NEXT:    s_and_b32 s5, s4, 15
-; GFX6-NEXT:    s_andn2_b32 s4, 15, s4
-; GFX6-NEXT:    s_bfe_u32 s3, s3, 0xf0001
-; GFX6-NEXT:    s_lshl_b32 s2, s2, s5
-; GFX6-NEXT:    s_lshr_b32 s3, s3, s4
+; GFX6-NEXT:    s_or_b32 s1, s1, s3
+; GFX6-NEXT:    s_and_b32 s3, s8, 15
+; GFX6-NEXT:    s_lshl_b32 s2, s2, s4
+; GFX6-NEXT:    s_and_b32 s4, s5, 0xffff
+; GFX6-NEXT:    s_lshr_b32 s3, s4, s3
 ; GFX6-NEXT:    s_and_b32 s1, 0xffff, s1
 ; GFX6-NEXT:    s_or_b32 s2, s2, s3
 ; GFX6-NEXT:    s_and_b32 s0, 0xffff, s0
@@ -4134,43 +3994,26 @@ define amdgpu_ps i48 @s_fshr_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <
 ;
 ; GFX8-LABEL: s_fshr_v3i16:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_and_b32 s8, 0xffff, s2
 ; GFX8-NEXT:    s_lshr_b32 s6, s0, 16
 ; GFX8-NEXT:    s_lshr_b32 s7, s2, 16
-; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX8-NEXT:    s_lshr_b32 s8, s8, 15
-; GFX8-NEXT:    s_lshl_b32 s2, s2, 1
-; GFX8-NEXT:    s_or_b32 s0, s0, s8
-; GFX8-NEXT:    s_lshl_b32 s6, s6, 1
-; GFX8-NEXT:    s_lshr_b32 s8, s7, 15
-; GFX8-NEXT:    s_xor_b32 s4, s4, -1
-; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
-; GFX8-NEXT:    s_or_b32 s6, s6, s8
 ; GFX8-NEXT:    s_lshr_b32 s8, s4, 16
 ; GFX8-NEXT:    s_and_b32 s9, s4, 15
 ; GFX8-NEXT:    s_andn2_b32 s4, 15, s4
-; GFX8-NEXT:    s_lshr_b32 s2, s2, 1
-; GFX8-NEXT:    s_lshl_b32 s0, s0, s9
-; GFX8-NEXT:    s_lshr_b32 s2, s2, s4
-; GFX8-NEXT:    s_lshl_b32 s7, s7, 1
+; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
+; GFX8-NEXT:    s_lshl_b32 s0, s0, s4
+; GFX8-NEXT:    s_lshr_b32 s2, s2, s9
 ; GFX8-NEXT:    s_or_b32 s0, s0, s2
 ; GFX8-NEXT:    s_and_b32 s2, s8, 15
-; GFX8-NEXT:    s_lshl_b32 s2, s6, s2
-; GFX8-NEXT:    s_and_b32 s6, 0xffff, s7
 ; GFX8-NEXT:    s_andn2_b32 s4, 15, s8
-; GFX8-NEXT:    s_lshr_b32 s6, s6, 1
-; GFX8-NEXT:    s_lshr_b32 s4, s6, s4
-; GFX8-NEXT:    s_or_b32 s2, s2, s4
-; GFX8-NEXT:    s_and_b32 s4, 0xffff, s3
+; GFX8-NEXT:    s_lshl_b32 s6, s6, 1
+; GFX8-NEXT:    s_lshl_b32 s4, s6, s4
+; GFX8-NEXT:    s_lshr_b32 s2, s7, s2
+; GFX8-NEXT:    s_or_b32 s2, s4, s2
+; GFX8-NEXT:    s_and_b32 s4, s5, 15
+; GFX8-NEXT:    s_andn2_b32 s5, 15, s5
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 1
-; GFX8-NEXT:    s_lshr_b32 s4, s4, 15
-; GFX8-NEXT:    s_lshl_b32 s3, s3, 1
-; GFX8-NEXT:    s_or_b32 s1, s1, s4
-; GFX8-NEXT:    s_xor_b32 s4, s5, -1
 ; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
-; GFX8-NEXT:    s_and_b32 s5, s4, 15
-; GFX8-NEXT:    s_andn2_b32 s4, 15, s4
-; GFX8-NEXT:    s_lshr_b32 s3, s3, 1
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, s5
 ; GFX8-NEXT:    s_lshr_b32 s3, s3, s4
 ; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
@@ -4325,92 +4168,58 @@ define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
 ; GFX6-LABEL: v_fshr_v3i16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_and_b32_e32 v7, 0xffff, v7
-; GFX6-NEXT:    v_and_b32_e32 v6, 0xffff, v6
-; GFX6-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
-; GFX6-NEXT:    v_or_b32_e32 v6, v6, v7
-; GFX6-NEXT:    v_and_b32_e32 v7, 0xffff, v8
-; GFX6-NEXT:    v_bfe_u32 v8, v3, 1, 15
-; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX6-NEXT:    v_lshrrev_b32_e32 v8, 14, v8
-; GFX6-NEXT:    v_or_b32_e32 v0, v0, v8
-; GFX6-NEXT:    v_bfe_u32 v8, v4, 1, 15
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v8, 14, v8
-; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v6
-; GFX6-NEXT:    v_or_b32_e32 v1, v1, v8
-; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
-; GFX6-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
 ; GFX6-NEXT:    v_and_b32_e32 v9, 15, v6
 ; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v6
 ; GFX6-NEXT:    v_and_b32_e32 v6, 15, v6
-; GFX6-NEXT:    v_bfe_u32 v3, v3, 1, 15
-; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v9, v0
-; GFX6-NEXT:    v_lshrrev_b32_e32 v3, v6, v3
-; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 1, v4
+; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX6-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v6, v0
+; GFX6-NEXT:    v_lshrrev_b32_e32 v3, v9, v3
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v3
-; GFX6-NEXT:    v_and_b32_e32 v3, 15, v8
-; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v8
+; GFX6-NEXT:    v_and_b32_e32 v3, 15, v7
+; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v7
+; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
 ; GFX6-NEXT:    v_and_b32_e32 v6, 15, v6
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v3, v1
-; GFX6-NEXT:    v_bfe_u32 v3, v4, 1, 15
-; GFX6-NEXT:    v_lshrrev_b32_e32 v3, v6, v3
-; GFX6-NEXT:    v_or_b32_e32 v1, v1, v3
-; GFX6-NEXT:    v_bfe_u32 v3, v5, 1, 15
-; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
-; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 14, v3
-; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v7
-; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
-; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v5
-; GFX6-NEXT:    v_and_b32_e32 v5, 15, v4
-; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
+; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
+; GFX6-NEXT:    v_lshrrev_b32_e32 v3, v3, v4
+; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v8
+; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v6, v1
 ; GFX6-NEXT:    v_and_b32_e32 v4, 15, v4
-; GFX6-NEXT:    v_bfe_u32 v3, v3, 1, 15
-; GFX6-NEXT:    v_lshlrev_b32_e32 v2, v5, v2
-; GFX6-NEXT:    v_lshrrev_b32_e32 v3, v4, v3
+; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
+; GFX6-NEXT:    v_or_b32_e32 v1, v1, v3
+; GFX6-NEXT:    v_and_b32_e32 v3, 15, v8
+; GFX6-NEXT:    v_lshlrev_b32_e32 v2, v4, v2
+; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v5
+; GFX6-NEXT:    v_lshrrev_b32_e32 v3, v3, v4
 ; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_fshr_v3i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_lshlrev_b16_e32 v6, 1, v0
-; GFX8-NEXT:    v_lshrrev_b16_e32 v7, 15, v2
-; GFX8-NEXT:    v_or_b32_e32 v6, v6, v7
-; GFX8-NEXT:    v_mov_b32_e32 v7, 1
-; GFX8-NEXT:    v_mov_b32_e32 v8, 15
-; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_lshrrev_b16_sdwa v9, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v4
-; GFX8-NEXT:    v_or_b32_e32 v0, v0, v9
-; GFX8-NEXT:    v_lshlrev_b16_e32 v9, 1, v2
-; GFX8-NEXT:    v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_and_b32_e32 v7, 15, v4
-; GFX8-NEXT:    v_xor_b32_e32 v10, -1, v4
-; GFX8-NEXT:    v_and_b32_e32 v10, 15, v10
-; GFX8-NEXT:    v_lshlrev_b16_e32 v6, v7, v6
-; GFX8-NEXT:    v_lshrrev_b16_e32 v7, 1, v9
-; GFX8-NEXT:    v_lshrrev_b16_e32 v7, v10, v7
-; GFX8-NEXT:    v_or_b32_e32 v6, v6, v7
-; GFX8-NEXT:    v_and_b32_sdwa v7, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_xor_b32_e32 v7, -1, v4
+; GFX8-NEXT:    v_and_b32_e32 v6, 15, v4
+; GFX8-NEXT:    v_and_b32_e32 v7, 15, v7
+; GFX8-NEXT:    v_lshlrev_b16_e32 v8, 1, v0
+; GFX8-NEXT:    v_lshlrev_b16_e32 v7, v7, v8
+; GFX8-NEXT:    v_lshrrev_b16_e32 v6, v6, v2
+; GFX8-NEXT:    v_or_b32_e32 v6, v7, v6
+; GFX8-NEXT:    v_mov_b32_e32 v7, 15
 ; GFX8-NEXT:    v_mov_b32_e32 v8, -1
+; GFX8-NEXT:    v_and_b32_sdwa v7, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; GFX8-NEXT:    v_xor_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_mov_b32_e32 v8, 1
 ; GFX8-NEXT:    v_and_b32_e32 v4, 15, v4
-; GFX8-NEXT:    v_lshrrev_b16_e32 v2, 1, v2
-; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v7, v0
-; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v4, v2
+; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v4, v0
+; GFX8-NEXT:    v_lshrrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v5
 ; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX8-NEXT:    v_and_b32_e32 v2, 15, v5
+; GFX8-NEXT:    v_and_b32_e32 v4, 15, v4
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 1, v1
-; GFX8-NEXT:    v_lshrrev_b16_e32 v2, 15, v3
-; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
-; GFX8-NEXT:    v_lshlrev_b16_e32 v2, 1, v3
-; GFX8-NEXT:    v_xor_b32_e32 v3, -1, v5
-; GFX8-NEXT:    v_and_b32_e32 v4, 15, v3
-; GFX8-NEXT:    v_xor_b32_e32 v3, -1, v3
-; GFX8-NEXT:    v_and_b32_e32 v3, 15, v3
-; GFX8-NEXT:    v_lshrrev_b16_e32 v2, 1, v2
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v1, v4, v1
-; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v3, v2
+; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v2, v3
 ; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
@@ -4484,131 +4293,83 @@ define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
 define amdgpu_ps <2 x i32> @s_fshr_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs, <4 x i16> inreg %amt) {
 ; GFX6-LABEL: s_fshr_v4i16:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_lshl_b32 s9, s9, 16
-; GFX6-NEXT:    s_and_b32 s8, s8, 0xffff
-; GFX6-NEXT:    s_or_b32 s8, s9, s8
-; GFX6-NEXT:    s_lshl_b32 s9, s11, 16
-; GFX6-NEXT:    s_and_b32 s10, s10, 0xffff
-; GFX6-NEXT:    s_or_b32 s9, s9, s10
-; GFX6-NEXT:    s_bfe_u32 s10, s4, 0xf0001
-; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX6-NEXT:    s_lshr_b32 s10, s10, 14
-; GFX6-NEXT:    s_or_b32 s0, s0, s10
-; GFX6-NEXT:    s_bfe_u32 s10, s5, 0xf0001
-; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
-; GFX6-NEXT:    s_lshr_b32 s10, s10, 14
-; GFX6-NEXT:    s_lshl_b32 s4, s4, 1
-; GFX6-NEXT:    s_xor_b32 s8, s8, -1
-; GFX6-NEXT:    s_or_b32 s1, s1, s10
-; GFX6-NEXT:    s_lshr_b32 s10, s8, 16
-; GFX6-NEXT:    s_and_b32 s11, s8, 15
+; GFX6-NEXT:    s_and_b32 s12, s8, 15
 ; GFX6-NEXT:    s_andn2_b32 s8, 15, s8
-; GFX6-NEXT:    s_bfe_u32 s4, s4, 0xf0001
-; GFX6-NEXT:    s_lshl_b32 s0, s0, s11
-; GFX6-NEXT:    s_lshr_b32 s4, s4, s8
-; GFX6-NEXT:    s_lshl_b32 s5, s5, 1
+; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX6-NEXT:    s_and_b32 s4, s4, 0xffff
+; GFX6-NEXT:    s_lshl_b32 s0, s0, s8
+; GFX6-NEXT:    s_lshr_b32 s4, s4, s12
 ; GFX6-NEXT:    s_or_b32 s0, s0, s4
-; GFX6-NEXT:    s_and_b32 s4, s10, 15
-; GFX6-NEXT:    s_andn2_b32 s8, 15, s10
-; GFX6-NEXT:    s_lshl_b32 s1, s1, s4
-; GFX6-NEXT:    s_bfe_u32 s4, s5, 0xf0001
-; GFX6-NEXT:    s_lshr_b32 s4, s4, s8
+; GFX6-NEXT:    s_and_b32 s4, s9, 15
+; GFX6-NEXT:    s_andn2_b32 s8, 15, s9
+; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
+; GFX6-NEXT:    s_and_b32 s5, s5, 0xffff
+; GFX6-NEXT:    s_lshl_b32 s1, s1, s8
+; GFX6-NEXT:    s_lshr_b32 s4, s5, s4
+; GFX6-NEXT:    s_andn2_b32 s5, 15, s10
+; GFX6-NEXT:    s_lshl_b32 s2, s2, 1
 ; GFX6-NEXT:    s_or_b32 s1, s1, s4
+; GFX6-NEXT:    s_and_b32 s4, s10, 15
+; GFX6-NEXT:    s_lshl_b32 s2, s2, s5
+; GFX6-NEXT:    s_and_b32 s5, s6, 0xffff
+; GFX6-NEXT:    s_lshr_b32 s4, s5, s4
+; GFX6-NEXT:    s_andn2_b32 s5, 15, s11
+; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
+; GFX6-NEXT:    s_or_b32 s2, s2, s4
+; GFX6-NEXT:    s_and_b32 s4, s11, 15
+; GFX6-NEXT:    s_lshl_b32 s3, s3, s5
+; GFX6-NEXT:    s_and_b32 s5, s7, 0xffff
+; GFX6-NEXT:    s_lshr_b32 s4, s5, s4
 ; GFX6-NEXT:    s_and_b32 s1, 0xffff, s1
+; GFX6-NEXT:    s_or_b32 s3, s3, s4
 ; GFX6-NEXT:    s_and_b32 s0, 0xffff, s0
 ; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
 ; GFX6-NEXT:    s_or_b32 s0, s0, s1
-; GFX6-NEXT:    s_lshl_b32 s1, s2, 1
-; GFX6-NEXT:    s_bfe_u32 s2, s6, 0xf0001
-; GFX6-NEXT:    s_lshr_b32 s2, s2, 14
-; GFX6-NEXT:    s_or_b32 s1, s1, s2
-; GFX6-NEXT:    s_lshl_b32 s2, s3, 1
-; GFX6-NEXT:    s_bfe_u32 s3, s7, 0xf0001
-; GFX6-NEXT:    s_lshr_b32 s3, s3, 14
-; GFX6-NEXT:    s_or_b32 s2, s2, s3
-; GFX6-NEXT:    s_lshl_b32 s3, s6, 1
-; GFX6-NEXT:    s_xor_b32 s5, s9, -1
-; GFX6-NEXT:    s_lshl_b32 s4, s7, 1
-; GFX6-NEXT:    s_lshr_b32 s6, s5, 16
-; GFX6-NEXT:    s_and_b32 s7, s5, 15
-; GFX6-NEXT:    s_andn2_b32 s5, 15, s5
-; GFX6-NEXT:    s_bfe_u32 s3, s3, 0xf0001
-; GFX6-NEXT:    s_lshl_b32 s1, s1, s7
-; GFX6-NEXT:    s_lshr_b32 s3, s3, s5
-; GFX6-NEXT:    s_or_b32 s1, s1, s3
-; GFX6-NEXT:    s_and_b32 s3, s6, 15
-; GFX6-NEXT:    s_andn2_b32 s5, 15, s6
-; GFX6-NEXT:    s_lshl_b32 s2, s2, s3
-; GFX6-NEXT:    s_bfe_u32 s3, s4, 0xf0001
-; GFX6-NEXT:    s_lshr_b32 s3, s3, s5
-; GFX6-NEXT:    s_or_b32 s2, s2, s3
-; GFX6-NEXT:    s_and_b32 s2, 0xffff, s2
-; GFX6-NEXT:    s_and_b32 s1, 0xffff, s1
+; GFX6-NEXT:    s_and_b32 s1, 0xffff, s2
+; GFX6-NEXT:    s_and_b32 s2, 0xffff, s3
 ; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
 ; GFX6-NEXT:    s_or_b32 s1, s1, s2
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: s_fshr_v4i16:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_and_b32 s8, 0xffff, s2
 ; GFX8-NEXT:    s_lshr_b32 s6, s0, 16
-; GFX8-NEXT:    s_lshr_b32 s7, s2, 16
+; GFX8-NEXT:    s_lshr_b32 s8, s2, 16
+; GFX8-NEXT:    s_lshr_b32 s10, s4, 16
+; GFX8-NEXT:    s_and_b32 s12, s4, 15
+; GFX8-NEXT:    s_andn2_b32 s4, 15, s4
 ; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX8-NEXT:    s_lshr_b32 s8, s8, 15
-; GFX8-NEXT:    s_lshl_b32 s2, s2, 1
-; GFX8-NEXT:    s_or_b32 s0, s0, s8
-; GFX8-NEXT:    s_lshl_b32 s6, s6, 1
-; GFX8-NEXT:    s_lshr_b32 s8, s7, 15
-; GFX8-NEXT:    s_xor_b32 s4, s4, -1
 ; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
-; GFX8-NEXT:    s_or_b32 s6, s6, s8
-; GFX8-NEXT:    s_lshr_b32 s8, s4, 16
-; GFX8-NEXT:    s_and_b32 s9, s4, 15
-; GFX8-NEXT:    s_andn2_b32 s4, 15, s4
-; GFX8-NEXT:    s_lshr_b32 s2, s2, 1
-; GFX8-NEXT:    s_lshl_b32 s0, s0, s9
-; GFX8-NEXT:    s_lshr_b32 s2, s2, s4
-; GFX8-NEXT:    s_lshl_b32 s7, s7, 1
+; GFX8-NEXT:    s_lshl_b32 s0, s0, s4
+; GFX8-NEXT:    s_lshr_b32 s2, s2, s12
 ; GFX8-NEXT:    s_or_b32 s0, s0, s2
-; GFX8-NEXT:    s_and_b32 s2, s8, 15
-; GFX8-NEXT:    s_lshl_b32 s2, s6, s2
-; GFX8-NEXT:    s_and_b32 s6, 0xffff, s7
-; GFX8-NEXT:    s_andn2_b32 s4, 15, s8
-; GFX8-NEXT:    s_lshr_b32 s6, s6, 1
-; GFX8-NEXT:    s_lshr_b32 s4, s6, s4
-; GFX8-NEXT:    s_or_b32 s2, s2, s4
-; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
-; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
-; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
-; GFX8-NEXT:    s_and_b32 s6, 0xffff, s3
-; GFX8-NEXT:    s_or_b32 s0, s0, s2
-; GFX8-NEXT:    s_lshr_b32 s2, s1, 16
-; GFX8-NEXT:    s_lshr_b32 s4, s3, 16
+; GFX8-NEXT:    s_and_b32 s2, s10, 15
+; GFX8-NEXT:    s_andn2_b32 s4, 15, s10
+; GFX8-NEXT:    s_lshl_b32 s6, s6, 1
+; GFX8-NEXT:    s_lshl_b32 s4, s6, s4
+; GFX8-NEXT:    s_lshr_b32 s2, s8, s2
+; GFX8-NEXT:    s_lshr_b32 s7, s1, 16
+; GFX8-NEXT:    s_lshr_b32 s9, s3, 16
+; GFX8-NEXT:    s_lshr_b32 s11, s5, 16
+; GFX8-NEXT:    s_or_b32 s2, s4, s2
+; GFX8-NEXT:    s_and_b32 s4, s5, 15
+; GFX8-NEXT:    s_andn2_b32 s5, 15, s5
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 1
-; GFX8-NEXT:    s_lshr_b32 s6, s6, 15
-; GFX8-NEXT:    s_lshl_b32 s3, s3, 1
-; GFX8-NEXT:    s_or_b32 s1, s1, s6
-; GFX8-NEXT:    s_lshl_b32 s2, s2, 1
-; GFX8-NEXT:    s_lshr_b32 s6, s4, 15
-; GFX8-NEXT:    s_xor_b32 s5, s5, -1
 ; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
-; GFX8-NEXT:    s_or_b32 s2, s2, s6
-; GFX8-NEXT:    s_lshr_b32 s6, s5, 16
-; GFX8-NEXT:    s_and_b32 s7, s5, 15
-; GFX8-NEXT:    s_andn2_b32 s5, 15, s5
-; GFX8-NEXT:    s_lshr_b32 s3, s3, 1
-; GFX8-NEXT:    s_lshl_b32 s1, s1, s7
-; GFX8-NEXT:    s_lshr_b32 s3, s3, s5
-; GFX8-NEXT:    s_lshl_b32 s4, s4, 1
+; GFX8-NEXT:    s_lshl_b32 s1, s1, s5
+; GFX8-NEXT:    s_lshr_b32 s3, s3, s4
 ; GFX8-NEXT:    s_or_b32 s1, s1, s3
-; GFX8-NEXT:    s_and_b32 s3, s6, 15
-; GFX8-NEXT:    s_lshl_b32 s2, s2, s3
-; GFX8-NEXT:    s_and_b32 s3, 0xffff, s4
-; GFX8-NEXT:    s_andn2_b32 s5, 15, s6
-; GFX8-NEXT:    s_lshr_b32 s3, s3, 1
-; GFX8-NEXT:    s_lshr_b32 s3, s3, s5
-; GFX8-NEXT:    s_or_b32 s2, s2, s3
+; GFX8-NEXT:    s_and_b32 s3, s11, 15
+; GFX8-NEXT:    s_andn2_b32 s4, 15, s11
+; GFX8-NEXT:    s_lshl_b32 s5, s7, 1
+; GFX8-NEXT:    s_lshl_b32 s4, s5, s4
+; GFX8-NEXT:    s_lshr_b32 s3, s9, s3
 ; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
+; GFX8-NEXT:    s_or_b32 s3, s4, s3
+; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
+; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
+; GFX8-NEXT:    s_or_b32 s0, s0, s2
+; GFX8-NEXT:    s_and_b32 s2, 0xffff, s3
 ; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
 ; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
 ; GFX8-NEXT:    s_or_b32 s1, s1, s2
@@ -4742,123 +4503,79 @@ define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
 ; GFX6-LABEL: v_fshr_v4i16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
-; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v8
-; GFX6-NEXT:    v_or_b32_e32 v8, v9, v8
-; GFX6-NEXT:    v_lshlrev_b32_e32 v9, 16, v11
-; GFX6-NEXT:    v_and_b32_e32 v10, 0xffff, v10
-; GFX6-NEXT:    v_or_b32_e32 v9, v9, v10
-; GFX6-NEXT:    v_bfe_u32 v10, v4, 1, 15
-; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX6-NEXT:    v_lshrrev_b32_e32 v10, 14, v10
-; GFX6-NEXT:    v_or_b32_e32 v0, v0, v10
-; GFX6-NEXT:    v_bfe_u32 v10, v5, 1, 15
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v10, 14, v10
-; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
-; GFX6-NEXT:    v_or_b32_e32 v1, v1, v10
-; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 1, v4
-; GFX6-NEXT:    v_lshrrev_b32_e32 v10, 16, v8
-; GFX6-NEXT:    v_and_b32_e32 v11, 15, v8
+; GFX6-NEXT:    v_and_b32_e32 v12, 15, v8
 ; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
 ; GFX6-NEXT:    v_and_b32_e32 v8, 15, v8
-; GFX6-NEXT:    v_bfe_u32 v4, v4, 1, 15
-; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v11, v0
-; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v8, v4
-; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 1, v5
+; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
+; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v8, v0
+; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v12, v4
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v4
-; GFX6-NEXT:    v_and_b32_e32 v4, 15, v10
-; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v10
+; GFX6-NEXT:    v_and_b32_e32 v4, 15, v9
+; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v9
+; GFX6-NEXT:    v_and_b32_e32 v5, 0xffff, v5
 ; GFX6-NEXT:    v_and_b32_e32 v8, 15, v8
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v4, v1
-; GFX6-NEXT:    v_bfe_u32 v4, v5, 1, 15
-; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v8, v4
-; GFX6-NEXT:    v_or_b32_e32 v1, v1, v4
-; GFX6-NEXT:    v_bfe_u32 v4, v6, 1, 15
+; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
+; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v4, v5
+; GFX6-NEXT:    v_xor_b32_e32 v5, -1, v10
+; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v8, v1
+; GFX6-NEXT:    v_and_b32_e32 v5, 15, v5
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
-; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 14, v4
-; GFX6-NEXT:    v_or_b32_e32 v2, v2, v4
-; GFX6-NEXT:    v_bfe_u32 v4, v7, 1, 15
+; GFX6-NEXT:    v_or_b32_e32 v1, v1, v4
+; GFX6-NEXT:    v_and_b32_e32 v4, 15, v10
+; GFX6-NEXT:    v_lshlrev_b32_e32 v2, v5, v2
+; GFX6-NEXT:    v_and_b32_e32 v5, 0xffff, v6
+; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v4, v5
+; GFX6-NEXT:    v_xor_b32_e32 v5, -1, v11
+; GFX6-NEXT:    v_and_b32_e32 v5, 15, v5
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
-; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 14, v4
-; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
-; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 1, v6
-; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v9
-; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 1, v7
-; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
-; GFX6-NEXT:    v_and_b32_e32 v8, 15, v6
-; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v6
-; GFX6-NEXT:    v_and_b32_e32 v6, 15, v6
-; GFX6-NEXT:    v_bfe_u32 v4, v4, 1, 15
-; GFX6-NEXT:    v_lshlrev_b32_e32 v2, v8, v2
-; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v6, v4
 ; GFX6-NEXT:    v_or_b32_e32 v2, v2, v4
-; GFX6-NEXT:    v_and_b32_e32 v4, 15, v7
-; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v7
-; GFX6-NEXT:    v_and_b32_e32 v6, 15, v6
-; GFX6-NEXT:    v_lshlrev_b32_e32 v3, v4, v3
-; GFX6-NEXT:    v_bfe_u32 v4, v5, 1, 15
-; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v6, v4
+; GFX6-NEXT:    v_and_b32_e32 v4, 15, v11
+; GFX6-NEXT:    v_lshlrev_b32_e32 v3, v5, v3
+; GFX6-NEXT:    v_and_b32_e32 v5, 0xffff, v7
+; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v4, v5
 ; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_fshr_v4i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_lshlrev_b16_e32 v6, 1, v0
-; GFX8-NEXT:    v_lshrrev_b16_e32 v7, 15, v2
-; GFX8-NEXT:    v_or_b32_e32 v6, v6, v7
-; GFX8-NEXT:    v_mov_b32_e32 v7, 1
-; GFX8-NEXT:    v_mov_b32_e32 v8, 15
-; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_lshrrev_b16_sdwa v9, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v4
-; GFX8-NEXT:    v_or_b32_e32 v0, v0, v9
-; GFX8-NEXT:    v_lshlrev_b16_e32 v9, 1, v2
-; GFX8-NEXT:    v_xor_b32_e32 v11, -1, v4
-; GFX8-NEXT:    v_and_b32_e32 v10, 15, v4
-; GFX8-NEXT:    v_and_b32_e32 v11, 15, v11
-; GFX8-NEXT:    v_lshrrev_b16_e32 v9, 1, v9
-; GFX8-NEXT:    v_lshlrev_b16_e32 v6, v10, v6
-; GFX8-NEXT:    v_lshrrev_b16_e32 v9, v11, v9
-; GFX8-NEXT:    v_mov_b32_e32 v10, -1
-; GFX8-NEXT:    v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_or_b32_e32 v6, v6, v9
-; GFX8-NEXT:    v_and_b32_sdwa v9, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_xor_b32_sdwa v4, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_xor_b32_e32 v7, -1, v4
+; GFX8-NEXT:    v_and_b32_e32 v6, 15, v4
+; GFX8-NEXT:    v_and_b32_e32 v7, 15, v7
+; GFX8-NEXT:    v_lshlrev_b16_e32 v8, 1, v0
+; GFX8-NEXT:    v_lshlrev_b16_e32 v7, v7, v8
+; GFX8-NEXT:    v_lshrrev_b16_e32 v6, v6, v2
+; GFX8-NEXT:    v_or_b32_e32 v6, v7, v6
+; GFX8-NEXT:    v_mov_b32_e32 v7, 15
+; GFX8-NEXT:    v_mov_b32_e32 v9, -1
+; GFX8-NEXT:    v_and_b32_sdwa v8, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_xor_b32_sdwa v4, v4, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_mov_b32_e32 v10, 1
 ; GFX8-NEXT:    v_and_b32_e32 v4, 15, v4
-; GFX8-NEXT:    v_lshrrev_b16_e32 v2, 1, v2
-; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v9, v0
-; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v4, v2
+; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v4, v0
+; GFX8-NEXT:    v_lshrrev_b16_sdwa v2, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v5
 ; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
-; GFX8-NEXT:    v_lshlrev_b16_e32 v2, 1, v1
-; GFX8-NEXT:    v_lshrrev_b16_e32 v4, 15, v3
-; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX8-NEXT:    v_or_b32_e32 v2, v2, v4
-; GFX8-NEXT:    v_lshlrev_b16_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_lshrrev_b16_sdwa v4, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_xor_b32_e32 v5, -1, v5
-; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
-; GFX8-NEXT:    v_lshlrev_b16_e32 v4, 1, v3
-; GFX8-NEXT:    v_lshlrev_b16_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_xor_b32_e32 v7, -1, v5
-; GFX8-NEXT:    v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_and_b32_e32 v6, 15, v5
-; GFX8-NEXT:    v_and_b32_e32 v7, 15, v7
-; GFX8-NEXT:    v_lshrrev_b16_e32 v4, 1, v4
-; GFX8-NEXT:    v_lshlrev_b16_e32 v2, v6, v2
-; GFX8-NEXT:    v_lshrrev_b16_e32 v4, v7, v4
-; GFX8-NEXT:    v_or_b32_e32 v2, v2, v4
-; GFX8-NEXT:    v_and_b32_sdwa v4, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_xor_b32_sdwa v5, v5, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_and_b32_e32 v2, 15, v5
+; GFX8-NEXT:    v_and_b32_e32 v4, 15, v4
+; GFX8-NEXT:    v_lshlrev_b16_e32 v8, 1, v1
+; GFX8-NEXT:    v_lshlrev_b16_e32 v4, v4, v8
+; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v2, v3
+; GFX8-NEXT:    v_or_b32_e32 v2, v4, v2
+; GFX8-NEXT:    v_and_b32_sdwa v4, v5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_xor_b32_sdwa v5, v5, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; GFX8-NEXT:    v_and_b32_e32 v5, 15, v5
-; GFX8-NEXT:    v_lshrrev_b16_e32 v3, 1, v3
-; GFX8-NEXT:    v_lshlrev_b16_e32 v1, v4, v1
-; GFX8-NEXT:    v_lshrrev_b16_e32 v3, v5, v3
+; GFX8-NEXT:    v_lshlrev_b16_sdwa v1, v10, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_lshlrev_b16_e32 v1, v5, v1
+; GFX8-NEXT:    v_lshrrev_b16_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
+; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX8-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-NEXT:    v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; GFX8-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir
index 0a15cc3824ae7..7be4e756463eb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir
@@ -196,90 +196,49 @@ body: |
     ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
     ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
-    ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
     ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
-    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
     ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
-    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
-    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
-    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]]
-    ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1
-    ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]]
-    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]]
-    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16)
-    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32)
-    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]]
-    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32)
-    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16)
-    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[ZEXT1]](s32)
-    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
-    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC]], [[TRUNC1]]
-    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]]
-    ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]]
-    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]]
-    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16)
-    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1
+    ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C3]](s32)
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16)
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[ZEXT]](s32)
     ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32)
-    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16)
-    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[ZEXT3]](s32)
-    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32)
-    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC2]], [[TRUNC3]]
-    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
-    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
-    ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY4]](s32)
-    ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[COPY5]](s32)
-    ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32)
-    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]]
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C4]]
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT1]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC2]], [[TRUNC3]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]]
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]]
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[COPY3]](s32)
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16)
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[ZEXT2]](s32)
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[ZEXT3]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC4]], [[TRUNC5]]
+    ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL4]]
     ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BITCAST3]]
-    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>)
-    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
-    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32)
-    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32)
-    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]]
-    ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]]
-    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]]
-    ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16)
-    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32)
-    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
-    ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]]
-    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY8]](s32)
-    ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16)
-    ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LSHR8]], [[ZEXT5]](s32)
-    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32)
-    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC6]], [[TRUNC7]]
-    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]]
-    ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]]
-    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]]
-    ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16)
-    ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32)
-    ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]]
-    ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY9]](s32)
-    ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16)
-    ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR10]], [[ZEXT7]](s32)
-    ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32)
-    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC8]], [[TRUNC9]]
-    ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C1]](s32)
-    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]]
-    ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; SI-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
+    ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
     ;
     ; VI-LABEL: name: test_fshr_v2s16_v2s16
     ; VI: liveins: $vgpr0, $vgpr1, $vgpr2
@@ -287,68 +246,42 @@ body: |
     ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
     ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
-    ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
     ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
     ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
-    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
     ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
     ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
     ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
-    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
     ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
-    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]]
-    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1
-    ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]]
-    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]]
-    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16)
-    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C]](s16)
-    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND1]](s16)
-    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL]], [[LSHR3]]
-    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]]
-    ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]]
-    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]]
-    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16)
-    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C]](s16)
-    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND3]](s16)
-    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR5]]
-    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+    ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
     ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
-    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
-    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32)
-    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C]](s16)
-    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C]](s16)
-    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C1]](s32)
-    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL4]]
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1
+    ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C2]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C3]](s16)
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND1]](s16)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[AND]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR3]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
+    ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C2]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C3]](s16)
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND3]](s16)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[AND2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL3]], [[LSHR4]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
     ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BITCAST3]]
-    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>)
-    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
-    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32)
-    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32)
-    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C2]]
-    ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C3]]
-    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]]
-    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[OR]], [[AND4]](s16)
-    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[C]](s16)
-    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND5]](s16)
-    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL5]], [[LSHR9]]
-    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C2]]
-    ; VI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C3]]
-    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]]
-    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND6]](s16)
-    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C]](s16)
-    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND7]](s16)
-    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL6]], [[LSHR11]]
-    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]]
-    ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; VI-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
     ;
     ; GFX9-LABEL: name: test_fshr_v2s16_v2s16
     ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
@@ -699,136 +632,75 @@ body: |
     ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
     ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
     ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
-    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
-    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
-    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]]
-    ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1
-    ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]]
-    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]]
-    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16)
-    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32)
-    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C5]]
-    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32)
-    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16)
-    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[ZEXT1]](s32)
-    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
-    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC]], [[TRUNC1]]
-    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]]
-    ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]]
-    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]]
-    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16)
-    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32)
-    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY6]](s32)
-    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16)
-    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[ZEXT3]](s32)
-    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32)
-    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC2]], [[TRUNC3]]
-    ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY7]](s32)
-    ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY8]](s32)
-    ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C]](s32)
-    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL4]]
-    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BITCAST4]]
-    ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>)
-    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
-    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
-    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32)
-    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]]
-    ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]]
-    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]]
-    ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16)
-    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32)
-    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
-    ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]]
-    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY11]](s32)
-    ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16)
-    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR7]], [[ZEXT5]](s32)
-    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32)
-    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC6]], [[TRUNC7]]
-    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]]
-    ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]]
-    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]]
-    ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16)
-    ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32)
-    ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]]
-    ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY12]](s32)
-    ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16)
-    ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[ZEXT7]](s32)
-    ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32)
-    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC8]], [[TRUNC9]]
-    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]]
-    ; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]]
-    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]]
-    ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[AND11]](s16)
-    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT8]](s32)
-    ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]]
-    ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY13]](s32)
-    ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16)
-    ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LSHR11]], [[ZEXT9]](s32)
-    ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32)
-    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC10]], [[TRUNC11]]
-    ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[COPY14]](s32)
-    ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C]](s32)
-    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL9]]
-    ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
-    ; SI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY5]], [[BITCAST6]]
-    ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>)
-    ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32)
-    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C2]]
-    ; SI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC12]], [[C3]]
-    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C2]]
-    ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND14]](s16)
-    ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR5]](s16)
-    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT10]](s32)
-    ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32)
-    ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[SHL8]], [[C5]]
-    ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND16]], [[COPY17]](s32)
-    ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND15]](s16)
-    ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR13]], [[ZEXT11]](s32)
-    ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR14]](s32)
-    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC13]], [[TRUNC14]]
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>)
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+    ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>)
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1
+    ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C3]](s32)
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16)
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[ZEXT]](s32)
+    ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]]
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT1]](s32)
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC3]], [[TRUNC4]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]]
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]]
+    ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[COPY6]](s32)
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16)
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[ZEXT2]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[ZEXT3]](s32)
+    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC5]], [[TRUNC6]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; SI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C2]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]]
+    ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[COPY7]](s32)
+    ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16)
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[ZEXT4]](s32)
+    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
+    ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16)
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]]
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT5]](s32)
+    ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC7]], [[TRUNC8]]
     ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
-    ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C]](s32)
-    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL11]]
-    ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
-    ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C5]]
-    ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
-    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL12]]
-    ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
-    ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C5]]
-    ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C]](s32)
-    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[LSHR15]], [[SHL13]]
-    ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
-    ; SI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>)
-    ; SI-NEXT: $vgpr1 = COPY [[BITCAST11]](<2 x s16>)
-    ; SI-NEXT: $vgpr2 = COPY [[BITCAST12]](<2 x s16>)
+    ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL6]]
+    ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]]
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]]
+    ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]]
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL8]]
+    ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>)
+    ; SI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>)
+    ; SI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>)
     ;
     ; VI-LABEL: name: test_fshr_v3s16_v3s16
     ; VI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
@@ -852,94 +724,59 @@ body: |
     ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
     ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
     ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
-    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
-    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
-    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]]
-    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1
-    ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]]
-    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]]
-    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16)
-    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16)
-    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND1]](s16)
-    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL]], [[LSHR3]]
-    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]]
-    ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]]
-    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]]
-    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16)
-    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16)
-    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND3]](s16)
-    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR5]]
-    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16)
-    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C1]](s16)
-    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32)
-    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]]
-    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BITCAST4]]
-    ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>)
-    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
-    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
-    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32)
-    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C2]]
-    ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C3]]
-    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]]
-    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[OR]], [[AND4]](s16)
-    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[C1]](s16)
-    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[LSHR7]], [[AND5]](s16)
-    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL5]], [[LSHR8]]
-    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C2]]
-    ; VI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C3]]
-    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]]
-    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND6]](s16)
-    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C1]](s16)
-    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[LSHR9]], [[AND7]](s16)
-    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL6]], [[LSHR10]]
-    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]]
-    ; VI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]]
-    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]]
-    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND8]](s16)
-    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16)
-    ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[LSHR11]], [[AND9]](s16)
-    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL7]], [[LSHR12]]
-    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C1]](s16)
-    ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32)
-    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[SHL9]]
-    ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
-    ; VI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY5]], [[BITCAST6]]
-    ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>)
-    ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32)
-    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C2]]
-    ; VI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C3]]
-    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C2]]
-    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[OR5]], [[AND10]](s16)
-    ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[SHL8]], [[C1]](s16)
-    ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[LSHR13]], [[AND11]](s16)
-    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL10]], [[LSHR14]]
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>)
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+    ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>)
+    ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1
+    ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C2]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C3]](s16)
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND1]](s16)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[AND]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR3]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
+    ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C2]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C3]](s16)
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND3]](s16)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[AND2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL3]], [[LSHR4]]
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
+    ; VI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C2]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[SHL4]], [[AND5]](s16)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[AND4]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL5]], [[LSHR5]]
     ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
-    ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
-    ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
-    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL11]]
-    ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
-    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
-    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C4]]
-    ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32)
-    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL12]]
-    ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
-    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C4]]
-    ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
-    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[LSHR15]], [[SHL13]]
-    ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
-    ; VI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>)
-    ; VI-NEXT: $vgpr1 = COPY [[BITCAST11]](<2 x s16>)
-    ; VI-NEXT: $vgpr2 = COPY [[BITCAST12]](<2 x s16>)
+    ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+    ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]]
+    ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]]
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]]
+    ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]]
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL8]]
+    ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>)
+    ; VI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>)
+    ; VI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>)
     ;
     ; GFX9-LABEL: name: test_fshr_v3s16_v3s16
     ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
@@ -1024,170 +861,89 @@ body: |
     ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
     ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5
     ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
     ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
     ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>)
-    ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
-    ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
-    ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
-    ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
-    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
-    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]]
-    ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1
-    ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]]
-    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]]
-    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16)
-    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32)
-    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]]
-    ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32)
-    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16)
-    ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[ZEXT1]](s32)
-    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
-    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC]], [[TRUNC1]]
-    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]]
-    ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]]
-    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]]
-    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16)
-    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32)
-    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32)
-    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16)
-    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[ZEXT3]](s32)
+    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
+    ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32)
+    ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
+    ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
     ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32)
-    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC2]], [[TRUNC3]]
-    ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
-    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
-    ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY4]](s32)
-    ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[COPY5]](s32)
-    ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32)
-    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]]
-    ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BITCAST3]]
-    ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>)
-    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
-    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32)
-    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32)
-    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]]
-    ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]]
-    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]]
-    ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16)
-    ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32)
-    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
-    ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]]
-    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY8]](s32)
-    ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16)
-    ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LSHR8]], [[ZEXT5]](s32)
-    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32)
-    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC6]], [[TRUNC7]]
-    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]]
-    ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]]
-    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]]
-    ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16)
-    ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
-    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32)
-    ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]]
-    ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY9]](s32)
-    ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16)
-    ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR10]], [[ZEXT7]](s32)
-    ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32)
-    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC8]], [[TRUNC9]]
-    ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C1]](s32)
-    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]]
-    ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32)
-    ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32)
-    ; SI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]]
-    ; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]]
-    ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]]
-    ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND11]](s16)
-    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[BITCAST6]], [[ZEXT10]](s32)
-    ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
-    ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]]
-    ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY10]](s32)
-    ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16)
-    ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LSHR14]], [[ZEXT11]](s32)
-    ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32)
-    ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC10]], [[TRUNC11]]
-    ; SI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]]
-    ; SI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]]
-    ; SI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C2]]
-    ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND14]](s16)
-    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR12]], [[ZEXT12]](s32)
-    ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LSHR13]], [[COPY11]](s32)
-    ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND15]](s16)
-    ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LSHR16]], [[ZEXT13]](s32)
-    ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR17]](s32)
-    ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC12]], [[TRUNC13]]
-    ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32)
-    ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST8]], [[COPY12]](s32)
-    ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LSHR18]], [[COPY13]](s32)
-    ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C1]](s32)
-    ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL12]]
-    ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
-    ; SI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BITCAST9]]
-    ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>)
-    ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32)
-    ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32)
-    ; SI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32)
-    ; SI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C2]]
-    ; SI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C3]]
-    ; SI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C2]]
-    ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16)
-    ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR6]](s16)
-    ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT14]](s32)
-    ; SI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
-    ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[SHL10]], [[C5]]
-    ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[COPY16]](s32)
-    ; SI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16)
-    ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LSHR20]], [[ZEXT15]](s32)
-    ; SI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32)
-    ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC16]], [[TRUNC17]]
-    ; SI-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C2]]
-    ; SI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C3]]
-    ; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C2]]
-    ; SI-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[AND19]](s16)
-    ; SI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR7]](s16)
-    ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT3]], [[ZEXT16]](s32)
-    ; SI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
-    ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; SI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[SHL11]], [[C5]]
-    ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND21]], [[COPY17]](s32)
-    ; SI-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16)
-    ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LSHR22]], [[ZEXT17]](s32)
-    ; SI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR23]](s32)
-    ; SI-NEXT: [[OR10:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC18]], [[TRUNC19]]
-    ; SI-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; SI-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16)
-    ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT19]], [[C1]](s32)
-    ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT18]], [[SHL15]]
-    ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
-    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST11]](<2 x s16>)
+    ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+    ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
+    ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1
+    ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]]
+    ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]]
+    ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C3]](s32)
+    ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16)
+    ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[ZEXT]](s32)
+    ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16)
+    ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]]
+    ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT1]](s32)
+    ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32)
+    ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC4]], [[TRUNC5]]
+    ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]]
+    ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]]
+    ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[COPY3]](s32)
+    ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16)
+    ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[ZEXT2]](s32)
+    ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16)
+    ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[ZEXT3]](s32)
+    ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32)
+    ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC6]], [[TRUNC7]]
+    ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
+    ; SI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C2]]
+    ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]]
+    ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[COPY4]](s32)
+    ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16)
+    ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[ZEXT4]](s32)
+    ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
+    ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16)
+    ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]]
+    ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT5]](s32)
+    ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32)
+    ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC8]], [[TRUNC9]]
+    ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
+    ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC3]], [[C2]]
+    ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C1]]
+    ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY5]](s32)
+    ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16)
+    ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[ZEXT6]](s32)
+    ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
+    ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16)
+    ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[ZEXT7]](s32)
+    ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32)
+    ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC10]], [[TRUNC11]]
+    ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32)
+    ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL8]]
+    ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C]](s32)
+    ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL9]]
+    ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
     ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ;
     ; VI-LABEL: name: test_fshr_v4s16_v4s16
@@ -1197,127 +953,75 @@ body: |
     ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
     ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5
     ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
-    ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
-    ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>)
-    ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
     ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
     ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
-    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
     ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
-    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
     ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
-    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+    ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
     ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
-    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
-    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]]
-    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1
-    ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]]
-    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]]
-    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16)
-    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C]](s16)
-    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND1]](s16)
-    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL]], [[LSHR3]]
-    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]]
-    ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]]
-    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]]
-    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16)
-    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C]](s16)
-    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND3]](s16)
-    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR5]]
+    ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
     ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
     ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
-    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
-    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32)
-    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C]](s16)
-    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C]](s16)
-    ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C1]](s32)
-    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL4]]
-    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
-    ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BITCAST3]]
-    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>)
-    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
-    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32)
-    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32)
-    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C2]]
-    ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C3]]
-    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]]
-    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[OR]], [[AND4]](s16)
-    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[C]](s16)
-    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND5]](s16)
-    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL5]], [[LSHR9]]
-    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C2]]
-    ; VI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C3]]
-    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]]
-    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND6]](s16)
-    ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C]](s16)
-    ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND7]](s16)
-    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL6]], [[LSHR11]]
-    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
-    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
-    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32)
-    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]]
-    ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
-    ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
-    ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32)
-    ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32)
-    ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32)
-    ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32)
-    ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32)
-    ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32)
-    ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]]
-    ; VI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]]
-    ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]]
-    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[TRUNC8]], [[AND8]](s16)
-    ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C]](s16)
-    ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[LSHR14]], [[AND9]](s16)
-    ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL8]], [[LSHR15]]
-    ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]]
-    ; VI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]]
-    ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C2]]
-    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[TRUNC9]], [[AND10]](s16)
-    ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC11]], [[C]](s16)
-    ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[LSHR16]], [[AND11]](s16)
-    ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL9]], [[LSHR17]]
-    ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
-    ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32)
-    ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32)
-    ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32)
-    ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC12]], [[C]](s16)
-    ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[TRUNC13]], [[C]](s16)
-    ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C1]](s32)
-    ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL12]]
-    ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
-    ; VI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BITCAST9]]
-    ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>)
-    ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32)
-    ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32)
-    ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32)
-    ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C2]]
-    ; VI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C3]]
-    ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C2]]
-    ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[OR6]], [[AND12]](s16)
-    ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s16) = G_LSHR [[SHL10]], [[C]](s16)
-    ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s16) = G_LSHR [[LSHR20]], [[AND13]](s16)
-    ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL13]], [[LSHR21]]
-    ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C2]]
-    ; VI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C3]]
-    ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C2]]
-    ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[OR7]], [[AND14]](s16)
-    ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s16) = G_LSHR [[SHL11]], [[C]](s16)
-    ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(s16) = G_LSHR [[LSHR22]], [[AND15]](s16)
-    ; VI-NEXT: [[OR10:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL14]], [[LSHR23]]
-    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
-    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16)
-    ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32)
-    ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL15]]
-    ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
-    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST11]](<2 x s16>)
+    ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+    ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
+    ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
+    ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>)
+    ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
+    ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32)
+    ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+    ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
+    ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32)
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]]
+    ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1
+    ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C2]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]]
+    ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C3]](s16)
+    ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND1]](s16)
+    ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[AND]](s16)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR6]]
+    ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]]
+    ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC9]], [[C2]]
+    ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]]
+    ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C3]](s16)
+    ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND3]](s16)
+    ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[AND2]](s16)
+    ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL3]], [[LSHR7]]
+    ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]]
+    ; VI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC10]], [[C2]]
+    ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]]
+    ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C3]](s16)
+    ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[SHL4]], [[AND5]](s16)
+    ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[AND4]](s16)
+    ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL5]], [[LSHR8]]
+    ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C1]]
+    ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC11]], [[C2]]
+    ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C1]]
+    ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C3]](s16)
+    ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[SHL6]], [[AND7]](s16)
+    ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[AND6]](s16)
+    ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL7]], [[LSHR9]]
+    ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+    ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]]
+    ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+    ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32)
+    ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]]
+    ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+    ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
     ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     ;
     ; GFX9-LABEL: name: test_fshr_v4s16_v4s16



More information about the llvm-commits mailing list