[llvm] [AMDGPU] Add known bits for G_AMDGPU_COPY_SCC_VCC (PR #180560)

via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 9 09:02:32 PST 2026


https://github.com/vangthao95 created https://github.com/llvm/llvm-project/pull/180560

None

>From 799bdb9e313b351ef54deb22d0c904e02d1e0934 Mon Sep 17 00:00:00 2001
From: Vang Thao <vang.thao at amd.com>
Date: Mon, 9 Feb 2026 09:01:29 -0800
Subject: [PATCH] [AMDGPU] Add known bits for G_AMDGPU_COPY_SCC_VCC

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  5 ++
 llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll   |  3 -
 llvm/test/CodeGen/AMDGPU/GlobalISel/fcmp.ll   | 71 +---------------
 .../CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll  |  3 -
 .../CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll  |  3 -
 llvm/test/CodeGen/AMDGPU/GlobalISel/icmp.ll   | 82 +++++++------------
 .../GlobalISel/inst-select-copy-scc-vcc.ll    | 10 ---
 .../AMDGPU/GlobalISel/regbankselect-mui.ll    |  3 -
 .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 71 ++++------------
 .../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll | 75 ++++-------------
 llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll   |  3 -
 .../CodeGen/AMDGPU/llvm.is.fpclass.f16.ll     | 39 +++------
 llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll   | 48 ++---------
 13 files changed, 86 insertions(+), 330 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index cdf6fb97d0b3b..ef881a9bac739 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -18584,6 +18584,11 @@ void SITargetLowering::computeKnownBitsForTargetInstr(
   case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
     Known.Zero.setHighBits(16);
     break;
+  case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
+    // G_AMDGPU_COPY_SCC_VCC converts a uniform boolean in VCC to SGPR s32,
+    // producing exactly 0 or 1.
+    Known.Zero.setHighBits(Known.getBitWidth() - 1);
+    break;
   case AMDGPU::G_AMDGPU_SMED3:
   case AMDGPU::G_AMDGPU_UMED3: {
     auto [Dst, Src0, Src1, Src2] = MI->getFirst4Regs();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
index 6a3077d149a2d..e935dd3dd476e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
@@ -704,7 +704,6 @@ define amdgpu_ps i64 @s_saddo_i64(i64 inreg %a, i64 inreg %b) {
 ; GFX7-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX7-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX7-NEXT:    s_xor_b32 s0, s0, s6
-; GFX7-NEXT:    s_and_b32 s0, s0, 1
 ; GFX7-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX7-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX7-NEXT:    s_add_u32 s0, s4, s0
@@ -724,7 +723,6 @@ define amdgpu_ps i64 @s_saddo_i64(i64 inreg %a, i64 inreg %b) {
 ; GFX8-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX8-NEXT:    s_xor_b32 s0, s0, s6
-; GFX8-NEXT:    s_and_b32 s0, s0, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX8-NEXT:    s_add_u32 s0, s4, s0
@@ -744,7 +742,6 @@ define amdgpu_ps i64 @s_saddo_i64(i64 inreg %a, i64 inreg %b) {
 ; GFX9-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX9-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX9-NEXT:    s_xor_b32 s0, s0, s6
-; GFX9-NEXT:    s_and_b32 s0, s0, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX9-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX9-NEXT:    s_add_u32 s0, s4, s0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fcmp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fcmp.ll
index e00f0238b3bcf..496c6597e9afd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fcmp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fcmp.ll
@@ -48,46 +48,32 @@ define void @fcmp_f16_uniform(half inreg %a, half inreg %b, ptr %p) {
 ; GFX10-NEXT:    s_cselect_b32 s17, 1, 0
 ; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
 ; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
-; GFX10-NEXT:    s_and_b32 s4, s4, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX10-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX10-NEXT:    s_and_b32 s5, s5, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s5, 0
 ; GFX10-NEXT:    s_cselect_b32 s5, 1, 0
-; GFX10-NEXT:    s_and_b32 s6, s6, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX10-NEXT:    s_cselect_b32 s6, 1, 0
-; GFX10-NEXT:    s_and_b32 s7, s7, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX10-NEXT:    s_cselect_b32 s7, 1, 0
-; GFX10-NEXT:    s_and_b32 s8, s8, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s8, 0
 ; GFX10-NEXT:    s_cselect_b32 s8, 1, 0
-; GFX10-NEXT:    s_and_b32 s9, s9, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s9, 0
 ; GFX10-NEXT:    s_cselect_b32 s9, 1, 0
-; GFX10-NEXT:    s_and_b32 s10, s10, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s10, 0
 ; GFX10-NEXT:    s_cselect_b32 s10, 1, 0
-; GFX10-NEXT:    s_and_b32 s11, s11, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s11, 0
 ; GFX10-NEXT:    s_cselect_b32 s11, 1, 0
-; GFX10-NEXT:    s_and_b32 s12, s12, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s12, 0
 ; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
-; GFX10-NEXT:    s_and_b32 s13, s13, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s13, 0
 ; GFX10-NEXT:    s_cselect_b32 s13, 1, 0
-; GFX10-NEXT:    s_and_b32 s14, s14, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s14, 0
 ; GFX10-NEXT:    s_cselect_b32 s14, 1, 0
-; GFX10-NEXT:    s_and_b32 s15, s15, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s15, 0
 ; GFX10-NEXT:    s_cselect_b32 s15, 1, 0
-; GFX10-NEXT:    s_and_b32 s17, s17, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s17, 0
 ; GFX10-NEXT:    s_cselect_b32 s17, 1, 0
-; GFX10-NEXT:    s_and_b32 s16, s16, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
 ; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
 ; GFX10-NEXT:    s_add_i32 s4, s4, s5
@@ -447,46 +433,32 @@ define void @fcmp_f32_uniform(float inreg %a, float inreg %b, ptr %p) {
 ; GFX10-NEXT:    s_cselect_b32 s17, 1, 0
 ; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
 ; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
-; GFX10-NEXT:    s_and_b32 s4, s4, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX10-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX10-NEXT:    s_and_b32 s5, s5, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s5, 0
 ; GFX10-NEXT:    s_cselect_b32 s5, 1, 0
-; GFX10-NEXT:    s_and_b32 s6, s6, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX10-NEXT:    s_cselect_b32 s6, 1, 0
-; GFX10-NEXT:    s_and_b32 s7, s7, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX10-NEXT:    s_cselect_b32 s7, 1, 0
-; GFX10-NEXT:    s_and_b32 s8, s8, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s8, 0
 ; GFX10-NEXT:    s_cselect_b32 s8, 1, 0
-; GFX10-NEXT:    s_and_b32 s9, s9, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s9, 0
 ; GFX10-NEXT:    s_cselect_b32 s9, 1, 0
-; GFX10-NEXT:    s_and_b32 s10, s10, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s10, 0
 ; GFX10-NEXT:    s_cselect_b32 s10, 1, 0
-; GFX10-NEXT:    s_and_b32 s11, s11, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s11, 0
 ; GFX10-NEXT:    s_cselect_b32 s11, 1, 0
-; GFX10-NEXT:    s_and_b32 s12, s12, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s12, 0
 ; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
-; GFX10-NEXT:    s_and_b32 s13, s13, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s13, 0
 ; GFX10-NEXT:    s_cselect_b32 s13, 1, 0
-; GFX10-NEXT:    s_and_b32 s14, s14, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s14, 0
 ; GFX10-NEXT:    s_cselect_b32 s14, 1, 0
-; GFX10-NEXT:    s_and_b32 s15, s15, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s15, 0
 ; GFX10-NEXT:    s_cselect_b32 s15, 1, 0
-; GFX10-NEXT:    s_and_b32 s17, s17, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s17, 0
 ; GFX10-NEXT:    s_cselect_b32 s17, 1, 0
-; GFX10-NEXT:    s_and_b32 s16, s16, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
 ; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
 ; GFX10-NEXT:    s_add_i32 s4, s4, s5
@@ -846,46 +818,32 @@ define void @fcmp_f64_uniform(double inreg %a, double inreg %b, ptr %p) {
 ; GFX10-NEXT:    s_cselect_b32 s17, 1, 0
 ; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
 ; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
-; GFX10-NEXT:    s_and_b32 s4, s4, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX10-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX10-NEXT:    s_and_b32 s5, s5, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s5, 0
 ; GFX10-NEXT:    s_cselect_b32 s5, 1, 0
-; GFX10-NEXT:    s_and_b32 s6, s6, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX10-NEXT:    s_cselect_b32 s6, 1, 0
-; GFX10-NEXT:    s_and_b32 s7, s7, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX10-NEXT:    s_cselect_b32 s7, 1, 0
-; GFX10-NEXT:    s_and_b32 s8, s8, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s8, 0
 ; GFX10-NEXT:    s_cselect_b32 s8, 1, 0
-; GFX10-NEXT:    s_and_b32 s9, s9, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s9, 0
 ; GFX10-NEXT:    s_cselect_b32 s9, 1, 0
-; GFX10-NEXT:    s_and_b32 s10, s10, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s10, 0
 ; GFX10-NEXT:    s_cselect_b32 s10, 1, 0
-; GFX10-NEXT:    s_and_b32 s11, s11, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s11, 0
 ; GFX10-NEXT:    s_cselect_b32 s11, 1, 0
-; GFX10-NEXT:    s_and_b32 s12, s12, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s12, 0
 ; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
-; GFX10-NEXT:    s_and_b32 s13, s13, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s13, 0
 ; GFX10-NEXT:    s_cselect_b32 s13, 1, 0
-; GFX10-NEXT:    s_and_b32 s14, s14, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s14, 0
 ; GFX10-NEXT:    s_cselect_b32 s14, 1, 0
-; GFX10-NEXT:    s_and_b32 s15, s15, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s15, 0
 ; GFX10-NEXT:    s_cselect_b32 s15, 1, 0
-; GFX10-NEXT:    s_and_b32 s17, s17, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s17, 0
 ; GFX10-NEXT:    s_cselect_b32 s17, 1, 0
-; GFX10-NEXT:    s_and_b32 s16, s16, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
 ; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
 ; GFX10-NEXT:    s_add_i32 s4, s4, s5
@@ -956,62 +914,35 @@ define void @fcmp_f64_uniform(double inreg %a, double inreg %b, ptr %p) {
 ; GFX12-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX12-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
-; GFX12-NEXT:    s_and_b32 s4, s4, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX12-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX12-NEXT:    s_and_b32 s1, s1, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX12-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX12-NEXT:    s_and_b32 s2, s2, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX12-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX12-NEXT:    s_and_b32 s3, s3, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX12-NEXT:    s_cselect_b32 s3, 1, 0
-; GFX12-NEXT:    s_and_b32 s5, s5, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s5, 0
 ; GFX12-NEXT:    s_cselect_b32 s5, 1, 0
-; GFX12-NEXT:    s_and_b32 s6, s6, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX12-NEXT:    s_cselect_b32 s6, 1, 0
-; GFX12-NEXT:    s_and_b32 s7, s7, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX12-NEXT:    s_cselect_b32 s7, 1, 0
-; GFX12-NEXT:    s_and_b32 s8, s8, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s8, 0
 ; GFX12-NEXT:    s_cselect_b32 s8, 1, 0
-; GFX12-NEXT:    s_and_b32 s9, s9, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s9, 0
 ; GFX12-NEXT:    s_cselect_b32 s9, 1, 0
-; GFX12-NEXT:    s_and_b32 s10, s10, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s10, 0
 ; GFX12-NEXT:    s_cselect_b32 s10, 1, 0
-; GFX12-NEXT:    s_and_b32 s11, s11, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s11, 0
 ; GFX12-NEXT:    s_cselect_b32 s11, 1, 0
-; GFX12-NEXT:    s_and_b32 s12, s12, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s12, 0
 ; GFX12-NEXT:    s_cselect_b32 s12, 1, 0
-; GFX12-NEXT:    s_and_b32 s13, s13, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s13, 0
 ; GFX12-NEXT:    s_cselect_b32 s13, 1, 0
-; GFX12-NEXT:    s_and_b32 s0, s0, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX12-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_add_co_i32 s1, s4, s1
 ; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_add_co_i32 s1, s1, s2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll
index c2b71f563990f..51d8b0523e644 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll
@@ -287,9 +287,6 @@ define amdgpu_ps float @s_test_fmax_legacy_f32(float inreg %a, float inreg %b) {
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX8-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v0
 ; GFX8-NEXT:    s_cmp_lg_u64 vcc, 0
-; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX8-NEXT:    s_and_b32 s0, s0, 1
-; GFX8-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX8-NEXT:    s_cselect_b32 s0, s2, s3
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    ; return to shader part epilog
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll
index a3b87dd7d66ae..ef9646e6b845f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll
@@ -416,9 +416,6 @@ define amdgpu_ps float @s_test_fmin_legacy_f32(float inreg %a, float inreg %b) {
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX8-NEXT:    v_cmp_le_f32_e32 vcc, s2, v0
 ; GFX8-NEXT:    s_cmp_lg_u64 vcc, 0
-; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX8-NEXT:    s_and_b32 s0, s0, 1
-; GFX8-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX8-NEXT:    s_cselect_b32 s0, s2, s3
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    ; return to shader part epilog
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/icmp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/icmp.ll
index d90cead68524e..1b4417890004e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/icmp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/icmp.ll
@@ -1094,39 +1094,34 @@ define void @icmp_p0_uniform(ptr inreg %a, ptr inreg %b, ptr addrspace(1) %p) {
 ; HAWAII-NEXT:    v_mov_b32_e32 v2, s18
 ; HAWAII-NEXT:    v_mov_b32_e32 v3, s19
 ; HAWAII-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[2:3]
+; HAWAII-NEXT:    s_mov_b32 s6, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_ne_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s6, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s7, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_lt_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s7, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s8, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_gt_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s8, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s9, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_le_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s9, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s10, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_ge_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s10, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s11, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    s_cselect_b32 s4, 1, 0
-; HAWAII-NEXT:    s_and_b32 s5, s6, 1
-; HAWAII-NEXT:    s_cmp_lg_u32 s5, 0
-; HAWAII-NEXT:    s_cselect_b32 s5, 1, 0
-; HAWAII-NEXT:    s_and_b32 s7, s7, 1
 ; HAWAII-NEXT:    s_cmp_lg_u32 s7, 0
-; HAWAII-NEXT:    s_cselect_b32 s7, 1, 0
-; HAWAII-NEXT:    s_and_b32 s8, s8, 1
+; HAWAII-NEXT:    s_cselect_b32 s5, 1, 0
 ; HAWAII-NEXT:    s_cmp_lg_u32 s8, 0
-; HAWAII-NEXT:    s_cselect_b32 s8, 1, 0
-; HAWAII-NEXT:    s_and_b32 s9, s9, 1
+; HAWAII-NEXT:    s_cselect_b32 s7, 1, 0
 ; HAWAII-NEXT:    s_cmp_lg_u32 s9, 0
-; HAWAII-NEXT:    s_cselect_b32 s9, 1, 0
-; HAWAII-NEXT:    s_and_b32 s10, s10, 1
+; HAWAII-NEXT:    s_cselect_b32 s8, 1, 0
 ; HAWAII-NEXT:    s_cmp_lg_u32 s10, 0
+; HAWAII-NEXT:    s_cselect_b32 s9, 1, 0
+; HAWAII-NEXT:    s_cmp_lg_u32 s11, 0
 ; HAWAII-NEXT:    s_cselect_b32 s10, 1, 0
-; HAWAII-NEXT:    s_and_b32 s4, s4, 1
 ; HAWAII-NEXT:    s_cmp_lg_u32 s4, 0
 ; HAWAII-NEXT:    s_cselect_b32 s4, 1, 0
 ; HAWAII-NEXT:    s_add_i32 s5, s5, s7
@@ -1134,7 +1129,6 @@ define void @icmp_p0_uniform(ptr inreg %a, ptr inreg %b, ptr addrspace(1) %p) {
 ; HAWAII-NEXT:    s_add_i32 s5, s5, s9
 ; HAWAII-NEXT:    s_add_i32 s5, s5, s10
 ; HAWAII-NEXT:    s_add_i32 s4, s5, s4
-; HAWAII-NEXT:    s_mov_b32 s6, 0
 ; HAWAII-NEXT:    v_mov_b32_e32 v2, s4
 ; HAWAII-NEXT:    s_mov_b32 s7, 0xf000
 ; HAWAII-NEXT:    s_mov_b64 s[4:5], 0
@@ -1170,22 +1164,15 @@ define void @icmp_p0_uniform(ptr inreg %a, ptr inreg %b, ptr addrspace(1) %p) {
 ; GFX12-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX12-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX12-NEXT:    s_cselect_b32 s3, 1, 0
-; GFX12-NEXT:    s_and_b32 s4, s5, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
-; GFX12-NEXT:    s_cmp_lg_u32 s4, 0
-; GFX12-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX12-NEXT:    s_and_b32 s5, s7, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s5, 0
+; GFX12-NEXT:    s_cselect_b32 s4, 1, 0
+; GFX12-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX12-NEXT:    s_cselect_b32 s5, 1, 0
-; GFX12-NEXT:    s_and_b32 s1, s1, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX12-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX12-NEXT:    s_and_b32 s0, s0, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX12-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_add_co_i32 s2, s2, s3
 ; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_add_co_i32 s2, s2, s4
@@ -1308,39 +1295,34 @@ define void @icmp_p1_uniform(ptr addrspace(1) inreg %a, ptr addrspace(1) inreg %
 ; HAWAII-NEXT:    v_mov_b32_e32 v2, s18
 ; HAWAII-NEXT:    v_mov_b32_e32 v3, s19
 ; HAWAII-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[2:3]
+; HAWAII-NEXT:    s_mov_b32 s6, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_ne_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s6, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s7, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_lt_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s7, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s8, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_gt_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s8, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s9, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_le_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s9, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s10, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_ge_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s10, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s11, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    s_cselect_b32 s4, 1, 0
-; HAWAII-NEXT:    s_and_b32 s5, s6, 1
-; HAWAII-NEXT:    s_cmp_lg_u32 s5, 0
-; HAWAII-NEXT:    s_cselect_b32 s5, 1, 0
-; HAWAII-NEXT:    s_and_b32 s7, s7, 1
 ; HAWAII-NEXT:    s_cmp_lg_u32 s7, 0
-; HAWAII-NEXT:    s_cselect_b32 s7, 1, 0
-; HAWAII-NEXT:    s_and_b32 s8, s8, 1
+; HAWAII-NEXT:    s_cselect_b32 s5, 1, 0
 ; HAWAII-NEXT:    s_cmp_lg_u32 s8, 0
-; HAWAII-NEXT:    s_cselect_b32 s8, 1, 0
-; HAWAII-NEXT:    s_and_b32 s9, s9, 1
+; HAWAII-NEXT:    s_cselect_b32 s7, 1, 0
 ; HAWAII-NEXT:    s_cmp_lg_u32 s9, 0
-; HAWAII-NEXT:    s_cselect_b32 s9, 1, 0
-; HAWAII-NEXT:    s_and_b32 s10, s10, 1
+; HAWAII-NEXT:    s_cselect_b32 s8, 1, 0
 ; HAWAII-NEXT:    s_cmp_lg_u32 s10, 0
+; HAWAII-NEXT:    s_cselect_b32 s9, 1, 0
+; HAWAII-NEXT:    s_cmp_lg_u32 s11, 0
 ; HAWAII-NEXT:    s_cselect_b32 s10, 1, 0
-; HAWAII-NEXT:    s_and_b32 s4, s4, 1
 ; HAWAII-NEXT:    s_cmp_lg_u32 s4, 0
 ; HAWAII-NEXT:    s_cselect_b32 s4, 1, 0
 ; HAWAII-NEXT:    s_add_i32 s5, s5, s7
@@ -1348,7 +1330,6 @@ define void @icmp_p1_uniform(ptr addrspace(1) inreg %a, ptr addrspace(1) inreg %
 ; HAWAII-NEXT:    s_add_i32 s5, s5, s9
 ; HAWAII-NEXT:    s_add_i32 s5, s5, s10
 ; HAWAII-NEXT:    s_add_i32 s4, s5, s4
-; HAWAII-NEXT:    s_mov_b32 s6, 0
 ; HAWAII-NEXT:    v_mov_b32_e32 v2, s4
 ; HAWAII-NEXT:    s_mov_b32 s7, 0xf000
 ; HAWAII-NEXT:    s_mov_b64 s[4:5], 0
@@ -1384,22 +1365,15 @@ define void @icmp_p1_uniform(ptr addrspace(1) inreg %a, ptr addrspace(1) inreg %
 ; GFX12-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX12-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX12-NEXT:    s_cselect_b32 s3, 1, 0
-; GFX12-NEXT:    s_and_b32 s4, s5, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
-; GFX12-NEXT:    s_cmp_lg_u32 s4, 0
-; GFX12-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX12-NEXT:    s_and_b32 s5, s7, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s5, 0
+; GFX12-NEXT:    s_cselect_b32 s4, 1, 0
+; GFX12-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX12-NEXT:    s_cselect_b32 s5, 1, 0
-; GFX12-NEXT:    s_and_b32 s1, s1, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX12-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX12-NEXT:    s_and_b32 s0, s0, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX12-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_add_co_i32 s2, s2, s3
 ; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_add_co_i32 s2, s2, s4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll
index 588802cbd56c7..315b02edea075 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll
@@ -13,9 +13,6 @@ define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr add
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_cmp_eq_f32_e64 s[4:5], s6, 0
 ; GFX7-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5]
-; GFX7-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX7-NEXT:    s_and_b32 s4, s4, 1
-; GFX7-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX7-NEXT:    s_cselect_b32 s3, s7, s3
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
@@ -30,9 +27,6 @@ define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr add
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_cmp_eq_f32_e64 s[4:5], s0, 0
 ; GFX8-NEXT:    s_cmp_lg_u64 s[4:5], 0
-; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX8-NEXT:    s_and_b32 s0, s0, 1
-; GFX8-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX8-NEXT:    s_cselect_b32 s0, s1, s6
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s0
@@ -50,10 +44,6 @@ define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr add
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    v_cmp_eq_f32_e64 s0, s0, 0
 ; GFX11-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX11-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT:    s_and_b32 s0, s0, 1
-; GFX11-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX11-NEXT:    s_cselect_b32 s0, s1, s6
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
index e450da73ab47d..fa280a852383b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
@@ -211,9 +211,6 @@ define amdgpu_ps void @vcc_to_scc(float inreg %a, i32 inreg %b, i32 inreg %c, pt
 ; NEW_RBS:       ; %bb.0:
 ; NEW_RBS-NEXT:    v_cmp_eq_f32_e64 s0, s0, 0
 ; NEW_RBS-NEXT:    s_cmp_lg_u32 s0, 0
-; NEW_RBS-NEXT:    s_cselect_b32 s0, 1, 0
-; NEW_RBS-NEXT:    s_and_b32 s0, s0, 1
-; NEW_RBS-NEXT:    s_cmp_lg_u32 s0, 0
 ; NEW_RBS-NEXT:    s_cselect_b32 s0, s1, s2
 ; NEW_RBS-NEXT:    v_mov_b32_e32 v2, s0
 ; NEW_RBS-NEXT:    global_store_dword v[0:1], v2, off
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index 3b7a6e12a1b7d..6b0fa2f947e33 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -4269,11 +4269,10 @@ define amdgpu_ps i48 @s_saddsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[0:1], 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_xor_b32 s2, s0, s3
 ; GFX6-NEXT:    s_ashr_i32 s1, s7, 31
+; GFX6-NEXT:    s_xor_b32 s2, s0, s3
 ; GFX6-NEXT:    s_ashr_i32 s0, s7, 15
 ; GFX6-NEXT:    s_addk_i32 s1, 0x8000
-; GFX6-NEXT:    s_and_b32 s2, s2, 1
 ; GFX6-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX6-NEXT:    ; return to shader part epilog
@@ -4293,11 +4292,10 @@ define amdgpu_ps i48 @s_saddsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[0:1], 0
 ; GFX8-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX8-NEXT:    s_xor_b32 s2, s0, s3
 ; GFX8-NEXT:    s_ashr_i32 s1, s7, 31
+; GFX8-NEXT:    s_xor_b32 s2, s0, s3
 ; GFX8-NEXT:    s_ashr_i32 s0, s7, 15
 ; GFX8-NEXT:    s_addk_i32 s1, 0x8000
-; GFX8-NEXT:    s_and_b32 s2, s2, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX8-NEXT:    ; return to shader part epilog
@@ -4319,7 +4317,6 @@ define amdgpu_ps i48 @s_saddsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX9-NEXT:    s_xor_b32 s2, s0, s6
 ; GFX9-NEXT:    s_ashr_i32 s0, s5, 31
 ; GFX9-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX9-NEXT:    s_and_b32 s2, s2, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX9-NEXT:    s_ashr_i64 s[0:1], s[0:1], 16
@@ -4334,12 +4331,11 @@ define amdgpu_ps i48 @s_saddsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s0, s[4:5], s[0:1]
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s1, s[2:3], 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10PLUS-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX10PLUS-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX10PLUS-NEXT:    s_xor_b32 s1, s1, s0
 ; GFX10PLUS-NEXT:    s_ashr_i32 s0, s5, 31
-; GFX10PLUS-NEXT:    s_and_b32 s2, s1, 1
+; GFX10PLUS-NEXT:    s_xor_b32 s2, s1, s2
 ; GFX10PLUS-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
@@ -4638,7 +4634,6 @@ define amdgpu_ps i64 @s_saddsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX6-NEXT:    s_xor_b32 s2, s0, s6
 ; GFX6-NEXT:    s_ashr_i32 s0, s5, 31
 ; GFX6-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX6-NEXT:    s_and_b32 s2, s2, 1
 ; GFX6-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX6-NEXT:    ; return to shader part epilog
@@ -4658,7 +4653,6 @@ define amdgpu_ps i64 @s_saddsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX8-NEXT:    s_xor_b32 s2, s0, s6
 ; GFX8-NEXT:    s_ashr_i32 s0, s5, 31
 ; GFX8-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX8-NEXT:    s_and_b32 s2, s2, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX8-NEXT:    ; return to shader part epilog
@@ -4678,7 +4672,6 @@ define amdgpu_ps i64 @s_saddsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX9-NEXT:    s_xor_b32 s2, s0, s6
 ; GFX9-NEXT:    s_ashr_i32 s0, s5, 31
 ; GFX9-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX9-NEXT:    s_and_b32 s2, s2, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX9-NEXT:    ; return to shader part epilog
@@ -4690,12 +4683,11 @@ define amdgpu_ps i64 @s_saddsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s0, s[4:5], s[0:1]
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s1, s[2:3], 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10PLUS-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX10PLUS-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX10PLUS-NEXT:    s_xor_b32 s1, s1, s0
 ; GFX10PLUS-NEXT:    s_ashr_i32 s0, s5, 31
-; GFX10PLUS-NEXT:    s_and_b32 s2, s1, 1
+; GFX10PLUS-NEXT:    s_xor_b32 s2, s1, s2
 ; GFX10PLUS-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
@@ -4994,7 +4986,6 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX6-NEXT:    s_xor_b32 s4, s0, s10
 ; GFX6-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX6-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX6-NEXT:    s_and_b32 s4, s4, 1
 ; GFX6-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
 ; GFX6-NEXT:    s_add_u32 s4, s2, s6
@@ -5009,7 +5000,6 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX6-NEXT:    s_xor_b32 s6, s2, s8
 ; GFX6-NEXT:    s_ashr_i32 s2, s5, 31
 ; GFX6-NEXT:    s_add_i32 s3, s2, 0x80000000
-; GFX6-NEXT:    s_and_b32 s6, s6, 1
 ; GFX6-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
 ; GFX6-NEXT:    ; return to shader part epilog
@@ -5029,7 +5019,6 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX8-NEXT:    s_xor_b32 s4, s0, s10
 ; GFX8-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX8-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX8-NEXT:    s_and_b32 s4, s4, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
 ; GFX8-NEXT:    s_add_u32 s4, s2, s6
@@ -5045,7 +5034,6 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX8-NEXT:    s_xor_b32 s6, s2, s8
 ; GFX8-NEXT:    s_ashr_i32 s2, s5, 31
 ; GFX8-NEXT:    s_add_i32 s3, s2, 0x80000000
-; GFX8-NEXT:    s_and_b32 s6, s6, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
 ; GFX8-NEXT:    ; return to shader part epilog
@@ -5065,7 +5053,6 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX9-NEXT:    s_xor_b32 s4, s0, s10
 ; GFX9-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX9-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX9-NEXT:    s_and_b32 s4, s4, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
 ; GFX9-NEXT:    s_add_u32 s4, s2, s6
@@ -5081,7 +5068,6 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX9-NEXT:    s_xor_b32 s6, s2, s8
 ; GFX9-NEXT:    s_ashr_i32 s2, s5, 31
 ; GFX9-NEXT:    s_add_i32 s3, s2, 0x80000000
-; GFX9-NEXT:    s_and_b32 s6, s6, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
 ; GFX9-NEXT:    ; return to shader part epilog
@@ -5093,12 +5079,11 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s0, s[8:9], s[0:1]
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s1, s[4:5], 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10PLUS-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s4, 1, 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX10PLUS-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX10PLUS-NEXT:    s_xor_b32 s1, s1, s0
 ; GFX10PLUS-NEXT:    s_ashr_i32 s0, s9, 31
-; GFX10PLUS-NEXT:    s_and_b32 s4, s1, 1
+; GFX10PLUS-NEXT:    s_xor_b32 s4, s1, s4
 ; GFX10PLUS-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
@@ -5107,12 +5092,11 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s2, s[4:5], s[2:3]
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s3, s[6:7], 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX10PLUS-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s6, 1, 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX10PLUS-NEXT:    s_cselect_b32 s3, 1, 0
-; GFX10PLUS-NEXT:    s_xor_b32 s3, s3, s2
 ; GFX10PLUS-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX10PLUS-NEXT:    s_and_b32 s6, s3, 1
+; GFX10PLUS-NEXT:    s_xor_b32 s6, s3, s6
 ; GFX10PLUS-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX10PLUS-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
@@ -5140,18 +5124,12 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
 ; GFX6-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], vcc, vcc
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_and_b32 s3, s0, 1
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[6:7], 0
-; GFX6-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX6-NEXT:    s_cselect_b32 s2, s10, s2
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], 0
 ; GFX6-NEXT:    s_cselect_b32 s3, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_and_b32 s0, s0, 1
-; GFX6-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX6-NEXT:    s_cselect_b32 s0, 0, s3
 ; GFX6-NEXT:    s_xor_b32 s6, s0, s2
 ; GFX6-NEXT:    s_ashr_i32 s0, s9, 31
@@ -5416,22 +5394,19 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX6-NEXT:    v_add_i32_e32 v4, vcc, s0, v0
 ; GFX6-NEXT:    v_addc_u32_e32 v5, vcc, v1, v5, vcc
 ; GFX6-NEXT:    v_mov_b32_e32 v6, s2
-; GFX6-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[2:3], 0
 ; GFX6-NEXT:    v_mov_b32_e32 v7, s3
 ; GFX6-NEXT:    v_addc_u32_e32 v6, vcc, v2, v6, vcc
 ; GFX6-NEXT:    v_addc_u32_e32 v7, vcc, v3, v7, vcc
-; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    v_cmp_lt_u64_e32 vcc, v[4:5], v[0:1]
-; GFX6-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[2:3], 0
+; GFX6-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[2:3], 0
 ; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, v[6:7], v[2:3]
-; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX6-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[2:3], 0
 ; GFX6-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3]
-; GFX6-NEXT:    s_and_b32 s0, s0, 1
-; GFX6-NEXT:    s_cmp_lg_u32 s0, 0
+; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
+; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX6-NEXT:    s_cselect_b32 s0, 0, s4
 ; GFX6-NEXT:    v_xor_b32_e32 v0, s0, v0
@@ -5465,9 +5440,8 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX8-NEXT:    s_cmp_eq_u64 s[2:3], 0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX8-NEXT:    s_cselect_b32 s0, 0, s0
-; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
-; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x80000000, v2
 ; GFX8-NEXT:    v_cmp_ne_u16_e32 vcc, 0, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
@@ -5497,7 +5471,6 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX9-NEXT:    s_cselect_b32 s0, 0, s0
 ; GFX9-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX9-NEXT:    v_add_u32_e32 v3, 0x80000000, v2
 ; GFX9-NEXT:    v_cmp_ne_u16_e32 vcc, 0, v0
@@ -5527,7 +5500,6 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v2
 ; GFX10-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX10-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc_lo
@@ -5554,7 +5526,6 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX11-NEXT:    s_cselect_b32 s0, 0, s0
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v1, v0 :: v_dual_add_nc_u32 v3, 0x80000000, v2
 ; GFX11-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX11-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0, v0
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v4, v2 :: v_dual_cndmask_b32 v3, v7, v3
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc_lo
@@ -5836,18 +5807,12 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
 ; GFX6-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], vcc, vcc
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_and_b32 s3, s0, 1
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[10:11], 0
-; GFX6-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX6-NEXT:    s_cselect_b32 s2, s18, s2
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[10:11], 0
 ; GFX6-NEXT:    s_cselect_b32 s3, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_and_b32 s0, s0, 1
-; GFX6-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX6-NEXT:    s_cselect_b32 s0, 0, s3
 ; GFX6-NEXT:    s_xor_b32 s10, s0, s2
 ; GFX6-NEXT:    s_ashr_i32 s0, s17, 31
@@ -5874,18 +5839,12 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1]
 ; GFX6-NEXT:    s_cselect_b32 s6, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[4:5], vcc, vcc
-; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX6-NEXT:    s_and_b32 s7, s4, 1
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], s[14:15], 0
-; GFX6-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX6-NEXT:    s_cselect_b32 s6, s12, s6
 ; GFX6-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5]
 ; GFX6-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[14:15], 0
 ; GFX6-NEXT:    s_cselect_b32 s7, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5]
-; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX6-NEXT:    s_and_b32 s4, s4, 1
-; GFX6-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX6-NEXT:    s_cselect_b32 s4, 0, s7
 ; GFX6-NEXT:    s_xor_b32 s12, s4, s6
 ; GFX6-NEXT:    s_ashr_i32 s4, s11, 31
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index 677baf991fd1d..39960760a5961 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -4273,11 +4273,10 @@ define amdgpu_ps i48 @s_ssubsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_xor_b32 s2, s0, s3
 ; GFX6-NEXT:    s_ashr_i32 s1, s7, 31
+; GFX6-NEXT:    s_xor_b32 s2, s0, s3
 ; GFX6-NEXT:    s_ashr_i32 s0, s7, 15
 ; GFX6-NEXT:    s_addk_i32 s1, 0x8000
-; GFX6-NEXT:    s_and_b32 s2, s2, 1
 ; GFX6-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX6-NEXT:    ; return to shader part epilog
@@ -4297,11 +4296,10 @@ define amdgpu_ps i48 @s_ssubsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], 0
 ; GFX8-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX8-NEXT:    s_xor_b32 s2, s0, s3
 ; GFX8-NEXT:    s_ashr_i32 s1, s7, 31
+; GFX8-NEXT:    s_xor_b32 s2, s0, s3
 ; GFX8-NEXT:    s_ashr_i32 s0, s7, 15
 ; GFX8-NEXT:    s_addk_i32 s1, 0x8000
-; GFX8-NEXT:    s_and_b32 s2, s2, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX8-NEXT:    ; return to shader part epilog
@@ -4323,7 +4321,6 @@ define amdgpu_ps i48 @s_ssubsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX9-NEXT:    s_xor_b32 s2, s0, s6
 ; GFX9-NEXT:    s_ashr_i32 s0, s5, 31
 ; GFX9-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX9-NEXT:    s_and_b32 s2, s2, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX9-NEXT:    s_ashr_i64 s[0:1], s[0:1], 16
@@ -4338,12 +4335,11 @@ define amdgpu_ps i48 @s_ssubsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s0, s[4:5], s[0:1]
 ; GFX10PLUS-NEXT:    v_cmp_gt_i64_e64 s1, s[2:3], 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10PLUS-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX10PLUS-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX10PLUS-NEXT:    s_xor_b32 s1, s1, s0
 ; GFX10PLUS-NEXT:    s_ashr_i32 s0, s5, 31
-; GFX10PLUS-NEXT:    s_and_b32 s2, s1, 1
+; GFX10PLUS-NEXT:    s_xor_b32 s2, s1, s2
 ; GFX10PLUS-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
@@ -4642,7 +4638,6 @@ define amdgpu_ps i64 @s_ssubsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX6-NEXT:    s_xor_b32 s2, s0, s6
 ; GFX6-NEXT:    s_ashr_i32 s0, s5, 31
 ; GFX6-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX6-NEXT:    s_and_b32 s2, s2, 1
 ; GFX6-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX6-NEXT:    ; return to shader part epilog
@@ -4662,7 +4657,6 @@ define amdgpu_ps i64 @s_ssubsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX8-NEXT:    s_xor_b32 s2, s0, s6
 ; GFX8-NEXT:    s_ashr_i32 s0, s5, 31
 ; GFX8-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX8-NEXT:    s_and_b32 s2, s2, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX8-NEXT:    ; return to shader part epilog
@@ -4682,7 +4676,6 @@ define amdgpu_ps i64 @s_ssubsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX9-NEXT:    s_xor_b32 s2, s0, s6
 ; GFX9-NEXT:    s_ashr_i32 s0, s5, 31
 ; GFX9-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX9-NEXT:    s_and_b32 s2, s2, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX9-NEXT:    ; return to shader part epilog
@@ -4694,12 +4687,11 @@ define amdgpu_ps i64 @s_ssubsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s0, s[4:5], s[0:1]
 ; GFX10PLUS-NEXT:    v_cmp_gt_i64_e64 s1, s[2:3], 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10PLUS-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX10PLUS-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX10PLUS-NEXT:    s_xor_b32 s1, s1, s0
 ; GFX10PLUS-NEXT:    s_ashr_i32 s0, s5, 31
-; GFX10PLUS-NEXT:    s_and_b32 s2, s1, 1
+; GFX10PLUS-NEXT:    s_xor_b32 s2, s1, s2
 ; GFX10PLUS-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
@@ -4998,7 +4990,6 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX6-NEXT:    s_xor_b32 s4, s0, s10
 ; GFX6-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX6-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX6-NEXT:    s_and_b32 s4, s4, 1
 ; GFX6-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
 ; GFX6-NEXT:    s_sub_u32 s4, s2, s6
@@ -5013,7 +5004,6 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX6-NEXT:    s_xor_b32 s6, s2, s8
 ; GFX6-NEXT:    s_ashr_i32 s2, s5, 31
 ; GFX6-NEXT:    s_add_i32 s3, s2, 0x80000000
-; GFX6-NEXT:    s_and_b32 s6, s6, 1
 ; GFX6-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
 ; GFX6-NEXT:    ; return to shader part epilog
@@ -5033,7 +5023,6 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX8-NEXT:    s_xor_b32 s4, s0, s10
 ; GFX8-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX8-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX8-NEXT:    s_and_b32 s4, s4, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
 ; GFX8-NEXT:    s_sub_u32 s4, s2, s6
@@ -5049,7 +5038,6 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX8-NEXT:    s_xor_b32 s6, s2, s8
 ; GFX8-NEXT:    s_ashr_i32 s2, s5, 31
 ; GFX8-NEXT:    s_add_i32 s3, s2, 0x80000000
-; GFX8-NEXT:    s_and_b32 s6, s6, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
 ; GFX8-NEXT:    ; return to shader part epilog
@@ -5069,7 +5057,6 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX9-NEXT:    s_xor_b32 s4, s0, s10
 ; GFX9-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX9-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX9-NEXT:    s_and_b32 s4, s4, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
 ; GFX9-NEXT:    s_sub_u32 s4, s2, s6
@@ -5085,7 +5072,6 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX9-NEXT:    s_xor_b32 s6, s2, s8
 ; GFX9-NEXT:    s_ashr_i32 s2, s5, 31
 ; GFX9-NEXT:    s_add_i32 s3, s2, 0x80000000
-; GFX9-NEXT:    s_and_b32 s6, s6, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
 ; GFX9-NEXT:    ; return to shader part epilog
@@ -5097,12 +5083,11 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s0, s[8:9], s[0:1]
 ; GFX10PLUS-NEXT:    v_cmp_gt_i64_e64 s1, s[4:5], 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10PLUS-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s4, 1, 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX10PLUS-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX10PLUS-NEXT:    s_xor_b32 s1, s1, s0
 ; GFX10PLUS-NEXT:    s_ashr_i32 s0, s9, 31
-; GFX10PLUS-NEXT:    s_and_b32 s4, s1, 1
+; GFX10PLUS-NEXT:    s_xor_b32 s4, s1, s4
 ; GFX10PLUS-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
@@ -5111,12 +5096,11 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s2, s[4:5], s[2:3]
 ; GFX10PLUS-NEXT:    v_cmp_gt_i64_e64 s3, s[6:7], 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX10PLUS-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s6, 1, 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX10PLUS-NEXT:    s_cselect_b32 s3, 1, 0
-; GFX10PLUS-NEXT:    s_xor_b32 s3, s3, s2
 ; GFX10PLUS-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX10PLUS-NEXT:    s_and_b32 s6, s3, 1
+; GFX10PLUS-NEXT:    s_xor_b32 s6, s3, s6
 ; GFX10PLUS-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX10PLUS-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
@@ -5144,10 +5128,7 @@ define amdgpu_ps i128 @s_ssubsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1]
 ; GFX6-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], vcc, vcc
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_and_b32 s3, s0, 1
 ; GFX6-NEXT:    v_cmp_gt_u64_e64 s[0:1], s[4:5], 0
-; GFX6-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX6-NEXT:    s_cselect_b32 s2, s12, s2
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[6:7], 0
@@ -5156,9 +5137,6 @@ define amdgpu_ps i128 @s_ssubsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX6-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], 0
 ; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_and_b32 s0, s0, 1
-; GFX6-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX6-NEXT:    s_cselect_b32 s0, s3, s4
 ; GFX6-NEXT:    s_xor_b32 s4, s0, s2
 ; GFX6-NEXT:    s_ashr_i32 s0, s11, 31
@@ -5439,27 +5417,24 @@ define amdgpu_ps <4 x float> @ssubsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_mov_b32_e32 v5, s1
 ; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, s0, v0
-; GFX6-NEXT:    v_cmp_gt_u64_e64 s[0:1], s[0:1], 0
 ; GFX6-NEXT:    v_subb_u32_e32 v5, vcc, v1, v5, vcc
-; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    v_mov_b32_e32 v6, s2
-; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[2:3], 0
+; GFX6-NEXT:    v_cmp_gt_u64_e64 s[0:1], s[0:1], 0
 ; GFX6-NEXT:    v_mov_b32_e32 v7, s3
 ; GFX6-NEXT:    v_subb_u32_e32 v6, vcc, v2, v6, vcc
 ; GFX6-NEXT:    v_subb_u32_e32 v7, vcc, v3, v7, vcc
-; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    v_cmp_lt_u64_e32 vcc, v[4:5], v[0:1]
-; GFX6-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[2:3], 0
+; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[2:3], 0
 ; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, v[6:7], v[2:3]
-; GFX6-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX6-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[2:3], 0
 ; GFX6-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3]
-; GFX6-NEXT:    s_and_b32 s0, s0, 1
-; GFX6-NEXT:    s_cmp_lg_u32 s0, 0
+; GFX6-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX6-NEXT:    s_cselect_b32 s0, s4, s5
 ; GFX6-NEXT:    v_xor_b32_e32 v0, s0, v0
@@ -5496,9 +5471,8 @@ define amdgpu_ps <4 x float> @ssubsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX8-NEXT:    s_cmp_eq_u64 s[2:3], 0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX8-NEXT:    s_cselect_b32 s0, s4, s0
-; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
-; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x80000000, v2
 ; GFX8-NEXT:    v_cmp_ne_u16_e32 vcc, 0, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
@@ -5531,7 +5505,6 @@ define amdgpu_ps <4 x float> @ssubsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX9-NEXT:    s_cselect_b32 s0, s4, s0
 ; GFX9-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX9-NEXT:    v_add_u32_e32 v3, 0x80000000, v2
 ; GFX9-NEXT:    v_cmp_ne_u16_e32 vcc, 0, v0
@@ -5564,7 +5537,6 @@ define amdgpu_ps <4 x float> @ssubsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v2
 ; GFX10-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX10-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc_lo
@@ -5594,7 +5566,6 @@ define amdgpu_ps <4 x float> @ssubsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX11-NEXT:    s_cselect_b32 s0, s0, s1
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v1, v0 :: v_dual_add_nc_u32 v3, 0x80000000, v2
 ; GFX11-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX11-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0, v0
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v4, v2 :: v_dual_cndmask_b32 v3, v7, v3
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc_lo
@@ -5895,10 +5866,7 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, s[18:19], v[0:1]
 ; GFX6-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], vcc, vcc
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_and_b32 s3, s0, 1
 ; GFX6-NEXT:    v_cmp_gt_u64_e64 s[0:1], s[8:9], 0
-; GFX6-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX6-NEXT:    s_cselect_b32 s2, s20, s2
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[10:11], 0
@@ -5907,9 +5875,6 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX6-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[10:11], 0
 ; GFX6-NEXT:    s_cselect_b32 s8, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_and_b32 s0, s0, 1
-; GFX6-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX6-NEXT:    s_cselect_b32 s0, s3, s8
 ; GFX6-NEXT:    s_xor_b32 s8, s0, s2
 ; GFX6-NEXT:    s_ashr_i32 s0, s19, 31
@@ -5936,10 +5901,7 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1]
 ; GFX6-NEXT:    s_cselect_b32 s6, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[4:5], vcc, vcc
-; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX6-NEXT:    s_and_b32 s7, s4, 1
 ; GFX6-NEXT:    v_cmp_gt_u64_e64 s[4:5], s[12:13], 0
-; GFX6-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX6-NEXT:    s_cselect_b32 s6, s16, s6
 ; GFX6-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[4:5], s[14:15], 0
@@ -5948,9 +5910,6 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX6-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[14:15], 0
 ; GFX6-NEXT:    s_cselect_b32 s12, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5]
-; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX6-NEXT:    s_and_b32 s4, s4, 1
-; GFX6-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX6-NEXT:    s_cselect_b32 s4, s7, s12
 ; GFX6-NEXT:    s_xor_b32 s12, s4, s6
 ; GFX6-NEXT:    s_ashr_i32 s4, s11, 31
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll
index 904f33fbb924c..fa1f5472c3083 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll
@@ -704,7 +704,6 @@ define amdgpu_ps i64 @s_ssubo_i64(i64 inreg %a, i64 inreg %b) {
 ; GFX7-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX7-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX7-NEXT:    s_xor_b32 s0, s0, s6
-; GFX7-NEXT:    s_and_b32 s0, s0, 1
 ; GFX7-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX7-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX7-NEXT:    s_sub_u32 s0, s4, s0
@@ -724,7 +723,6 @@ define amdgpu_ps i64 @s_ssubo_i64(i64 inreg %a, i64 inreg %b) {
 ; GFX8-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX8-NEXT:    s_xor_b32 s0, s0, s6
-; GFX8-NEXT:    s_and_b32 s0, s0, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX8-NEXT:    s_sub_u32 s0, s4, s0
@@ -744,7 +742,6 @@ define amdgpu_ps i64 @s_ssubo_i64(i64 inreg %a, i64 inreg %b) {
 ; GFX9-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX9-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX9-NEXT:    s_xor_b32 s0, s0, s6
-; GFX9-NEXT:    s_and_b32 s0, s0, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX9-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX9-NEXT:    s_sub_u32 s0, s4, s0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index bc01298205901..76958e63d36c9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -64,9 +64,6 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
 ; GFX8GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GLISEL-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
 ; GFX8GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX8GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX8GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX8GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
 ; GFX8GLISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8GLISEL-NEXT:    v_mov_b32_e32 v2, s2
@@ -93,9 +90,6 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
 ; GFX9GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GLISEL-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
 ; GFX9GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX9GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX9GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX9GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
 ; GFX9GLISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
@@ -115,18 +109,15 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
 ;
 ; GFX10GLISEL-LABEL: sgpr_isnan_f16:
 ; GFX10GLISEL:       ; %bb.0:
-; GFX10GLISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
+; GFX10GLISEL-NEXT:    s_clause 0x1
+; GFX10GLISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX10GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX10GLISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GLISEL-NEXT:    v_cmp_class_f16_e64 s2, s0, 3
-; GFX10GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX10GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX10GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX10GLISEL-NEXT:    v_cmp_class_f16_e64 s2, s2, 3
 ; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX10GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
 ; GFX10GLISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10GLISEL-NEXT:    s_endpgm
 ;
@@ -156,35 +147,29 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
 ;
 ; GFX11GLISEL-TRUE16-LABEL: sgpr_isnan_f16:
 ; GFX11GLISEL-TRUE16:       ; %bb.0:
-; GFX11GLISEL-TRUE16-NEXT:    s_load_b32 s0, s[4:5], 0x2c
+; GFX11GLISEL-TRUE16-NEXT:    s_clause 0x1
+; GFX11GLISEL-TRUE16-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11GLISEL-TRUE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11GLISEL-TRUE16-NEXT:    v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 0
 ; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e32 vcc_lo, s0, v0.l
-; GFX11GLISEL-TRUE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e32 vcc_lo, s2, v0.l
 ; GFX11GLISEL-TRUE16-NEXT:    s_cmp_lg_u32 vcc_lo, 0
-; GFX11GLISEL-TRUE16-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX11GLISEL-TRUE16-NEXT:    s_and_b32 s2, s2, 1
-; GFX11GLISEL-TRUE16-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX11GLISEL-TRUE16-NEXT:    s_cselect_b32 s2, -1, 0
 ; GFX11GLISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, s2
-; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11GLISEL-TRUE16-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11GLISEL-TRUE16-NEXT:    s_endpgm
 ;
 ; GFX11GLISEL-FAKE16-LABEL: sgpr_isnan_f16:
 ; GFX11GLISEL-FAKE16:       ; %bb.0:
-; GFX11GLISEL-FAKE16-NEXT:    s_load_b32 s0, s[4:5], 0x2c
+; GFX11GLISEL-FAKE16-NEXT:    s_clause 0x1
+; GFX11GLISEL-FAKE16-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11GLISEL-FAKE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11GLISEL-FAKE16-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s2, s0, 3
-; GFX11GLISEL-FAKE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GLISEL-FAKE16-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX11GLISEL-FAKE16-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX11GLISEL-FAKE16-NEXT:    s_and_b32 s2, s2, 1
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s2, s2, 3
 ; GFX11GLISEL-FAKE16-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX11GLISEL-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
 ; GFX11GLISEL-FAKE16-NEXT:    v_mov_b32_e32 v0, s2
-; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11GLISEL-FAKE16-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11GLISEL-FAKE16-NEXT:    s_endpgm
   %result = call i1 @llvm.is.fpclass.f16(half %x, i32 3)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll
index 0a9fe10874c38..fd7424ce05af4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll
@@ -34,9 +34,6 @@ define amdgpu_kernel void @sgpr_isnan_f32(ptr addrspace(1) %out, float %x) {
 ; GFX7GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_cmp_class_f32_e64 s[4:5], s3, 3
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5]
-; GFX7GLISEL-NEXT:    s_cselect_b32 s3, 1, 0
-; GFX7GLISEL-NEXT:    s_and_b32 s3, s3, 1
-; GFX7GLISEL-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX7GLISEL-NEXT:    s_cselect_b32 s3, -1, 0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX7GLISEL-NEXT:    s_mov_b32 s3, 0xf000
@@ -62,9 +59,6 @@ define amdgpu_kernel void @sgpr_isnan_f32(ptr addrspace(1) %out, float %x) {
 ; GFX8GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GLISEL-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
 ; GFX8GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX8GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX8GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX8GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
 ; GFX8GLISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8GLISEL-NEXT:    v_mov_b32_e32 v2, s2
@@ -91,9 +85,6 @@ define amdgpu_kernel void @sgpr_isnan_f32(ptr addrspace(1) %out, float %x) {
 ; GFX9GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GLISEL-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
 ; GFX9GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX9GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX9GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX9GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
 ; GFX9GLISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
@@ -113,18 +104,15 @@ define amdgpu_kernel void @sgpr_isnan_f32(ptr addrspace(1) %out, float %x) {
 ;
 ; GFX10GLISEL-LABEL: sgpr_isnan_f32:
 ; GFX10GLISEL:       ; %bb.0:
-; GFX10GLISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
+; GFX10GLISEL-NEXT:    s_clause 0x1
+; GFX10GLISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX10GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX10GLISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GLISEL-NEXT:    v_cmp_class_f32_e64 s2, s0, 3
-; GFX10GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX10GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX10GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX10GLISEL-NEXT:    v_cmp_class_f32_e64 s2, s2, 3
 ; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX10GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
 ; GFX10GLISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10GLISEL-NEXT:    s_endpgm
 ;
@@ -143,20 +131,16 @@ define amdgpu_kernel void @sgpr_isnan_f32(ptr addrspace(1) %out, float %x) {
 ;
 ; GFX11GLISEL-LABEL: sgpr_isnan_f32:
 ; GFX11GLISEL:       ; %bb.0:
-; GFX11GLISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
+; GFX11GLISEL-NEXT:    s_clause 0x1
+; GFX11GLISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11GLISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11GLISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GLISEL-NEXT:    v_cmp_class_f32_e64 s2, s0, 3
-; GFX11GLISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX11GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX11GLISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX11GLISEL-NEXT:    v_cmp_class_f32_e64 s2, s2, 3
 ; GFX11GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX11GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
 ; GFX11GLISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11GLISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX11GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11GLISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11GLISEL-NEXT:    s_endpgm
   %result = call i1 @llvm.is.fpclass.f32(float %x, i32 3)  ; nan
@@ -186,9 +170,6 @@ define amdgpu_kernel void @sgpr_isnan_f64(ptr addrspace(1) %out, double %x) {
 ; GFX7GLISEL-NEXT:    v_cmp_class_f64_e64 s[4:5], s[2:3], 3
 ; GFX7GLISEL-NEXT:    s_mov_b32 s2, -1
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5]
-; GFX7GLISEL-NEXT:    s_cselect_b32 s3, 1, 0
-; GFX7GLISEL-NEXT:    s_and_b32 s3, s3, 1
-; GFX7GLISEL-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX7GLISEL-NEXT:    s_cselect_b32 s3, -1, 0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX7GLISEL-NEXT:    s_mov_b32 s3, 0xf000
@@ -214,9 +195,6 @@ define amdgpu_kernel void @sgpr_isnan_f64(ptr addrspace(1) %out, double %x) {
 ; GFX8GLISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8GLISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX8GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX8GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX8GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
 ; GFX8GLISEL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX8GLISEL-NEXT:    flat_store_dword v[0:1], v2
@@ -239,9 +217,6 @@ define amdgpu_kernel void @sgpr_isnan_f64(ptr addrspace(1) %out, double %x) {
 ; GFX9GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GLISEL-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 3
 ; GFX9GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX9GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX9GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX9GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
 ; GFX9GLISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
@@ -264,9 +239,6 @@ define amdgpu_kernel void @sgpr_isnan_f64(ptr addrspace(1) %out, double %x) {
 ; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10GLISEL-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
 ; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX10GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX10GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX10GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
 ; GFX10GLISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX10GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
@@ -290,10 +262,6 @@ define amdgpu_kernel void @sgpr_isnan_f64(ptr addrspace(1) %out, double %x) {
 ; GFX11GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11GLISEL-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
 ; GFX11GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX11GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX11GLISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX11GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX11GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
 ; GFX11GLISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11GLISEL-NEXT:    v_mov_b32_e32 v0, s2



More information about the llvm-commits mailing list