[llvm-branch-commits] [llvm] [AMDGPU][GlobalISel] Add COPY_SCC_VCC combine for VCC-SGPR-VGPR pattern (PR #179352)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sat Feb 7 01:32:44 PST 2026


https://github.com/vangthao95 updated https://github.com/llvm/llvm-project/pull/179352

>From 182b5a309a691b81bdc26679f8831ada9e7b7b0b Mon Sep 17 00:00:00 2001
From: Vang Thao <vang.thao at amd.com>
Date: Mon, 2 Feb 2026 14:15:20 -0800
Subject: [PATCH 1/3] [AMDGPU][GlobalISel] Add COPY_SCC_VCC combine for
 VCC-SGPR-VGPR pattern

Eliminate VCC->SGPR->VGPR bounce created by UniInVcc when the uniform boolean
result is consumed by a VALU instruction that requires the input in VGPRs.
---
 .../Target/AMDGPU/AMDGPURegBankLegalize.cpp   |  60 ++++
 ...regbanklegalize-eliminate-copy-scc-vcc.mir | 313 ++++++++++++++++++
 .../CodeGen/AMDGPU/llvm.is.fpclass.f16.ll     | 150 +++------
 llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll   | 300 +++++------------
 4 files changed, 506 insertions(+), 317 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbanklegalize-eliminate-copy-scc-vcc.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index 2e009cdec06ac..eb6a5b0c9edf1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -46,6 +46,12 @@ m_GAMDGPUReadAnyLane(const SrcTy &Src) {
   return UnaryOp_match<SrcTy, AMDGPU::G_AMDGPU_READANYLANE>(Src);
 }
 
+template <typename SrcTy>
+inline UnaryOp_match<SrcTy, AMDGPU::G_AMDGPU_COPY_SCC_VCC>
+m_GAMDGPUCopySccVcc(const SrcTy &Src) {
+  return UnaryOp_match<SrcTy, AMDGPU::G_AMDGPU_COPY_SCC_VCC>(Src);
+}
+
 class AMDGPURegBankLegalize : public MachineFunctionPass {
 public:
   static char ID;
@@ -136,6 +142,7 @@ class AMDGPURegBankLegalizeCombiner {
   bool tryEliminateReadAnyLane(MachineInstr &Copy);
   void tryCombineCopy(MachineInstr &MI);
   void tryCombineS1AnyExt(MachineInstr &MI);
+  bool tryEliminateCopySccVcc(MachineInstr &MI);
 };
 
 bool AMDGPURegBankLegalizeCombiner::isLaneMask(Register Reg) {
@@ -396,6 +403,57 @@ void AMDGPURegBankLegalizeCombiner::tryCombineS1AnyExt(MachineInstr &MI) {
   llvm_unreachable("missing anyext + trunc combine");
 }
 
+bool AMDGPURegBankLegalizeCombiner::tryEliminateCopySccVcc(MachineInstr &MI) {
+  // Eliminate VCC->SGPR->VGPR bounce for uniform boolean extensions. This is
+  // caused by UniInVcc which creates G_AMDGPU_COPY_SCC_VCC forcing the result
+  // to SGPR which in turn is forced back to VGPR by a subsequent instruction.
+  //
+  // %vcc:vcc(s1) = ...
+  // %sgpr:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %vcc
+  // %and:sgpr(s32) = G_AND %sgpr, 1
+  // %sel:sgpr(s32) = G_SELECT %and, {-1|1}, 0
+  // %vgpr:vgpr(s32) = COPY %sel
+  // ->
+  // %vgpr:vgpr(s32) = G_SELECT %vcc, {-1|1}, 0
+
+  Register VgprDst = MI.getOperand(0).getReg();
+  Register SgprSrc = MI.getOperand(1).getReg();
+
+  if (!VgprDst.isVirtual() || !SgprSrc.isVirtual())
+    return false;
+
+  if (MRI.getRegBankOrNull(VgprDst) != VgprRB ||
+      MRI.getRegBankOrNull(SgprSrc) != SgprRB)
+    return false;
+
+  // Match: G_SELECT (G_AND (G_AMDGPU_COPY_SCC_VCC %vcc), 1), SelTrueReg, 0
+  Register VccReg, SelTrueReg;
+  if (!mi_match(SgprSrc, MRI,
+                m_GISelect(m_GAnd(m_GAMDGPUCopySccVcc(m_Reg(VccReg)),
+                                  m_SpecificICst(1)),
+                           m_Reg(SelTrueReg), m_ZeroInt())))
+    return false;
+
+  if (MRI.getRegBankOrNull(VccReg) != VccRB)
+    return false;
+
+  // SelTrueReg must be constant -1 (SEXT) or 1 (ZEXT).
+  auto SelTrueConst = getIConstantVRegValWithLookThrough(SelTrueReg, MRI);
+  if (!SelTrueConst)
+    return false;
+  int64_t SelTrueVal = SelTrueConst->Value.getSExtValue();
+  if (SelTrueVal != -1 && SelTrueVal != 1)
+    return false;
+
+  B.setInstrAndDebugLoc(MI);
+  LLT Ty = MRI.getType(VgprDst);
+  B.buildSelect(VgprDst, VccReg, B.buildConstant({VgprRB, Ty}, SelTrueVal),
+                B.buildConstant({VgprRB, Ty}, 0));
+
+  eraseInstr(MI, MRI);
+  return true;
+}
+
 // Search through MRI for virtual registers with sgpr register bank and S1 LLT.
 [[maybe_unused]] static Register getAnySgprS1(const MachineRegisterInfo &MRI) {
   const LLT S1 = LLT::scalar(1);
@@ -525,6 +583,8 @@ bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
   for (MachineBasicBlock &MBB : MF) {
     for (MachineInstr &MI : make_early_inc_range(MBB)) {
       if (MI.getOpcode() == AMDGPU::COPY) {
+        if (Combiner.tryEliminateCopySccVcc(MI))
+          continue;
         Combiner.tryCombineCopy(MI);
         continue;
       }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbanklegalize-eliminate-copy-scc-vcc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbanklegalize-eliminate-copy-scc-vcc.mir
new file mode 100644
index 0000000000000..ad89b8ba603c8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbanklegalize-eliminate-copy-scc-vcc.mir
@@ -0,0 +1,313 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s
+
+---
+name: test_eliminate_sext_to_vgpr
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr2_sgpr3
+    ; CHECK-LABEL: name: test_eliminate_sext_to_vgpr
+    ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+    ; CHECK-NEXT: [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY2]](s32), 3
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[IS_FPCLASS]](s1), [[C1]], [[C]]
+    ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[COPY1]](p1) :: (store (s32), addrspace 1)
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(p1) = COPY $sgpr2_sgpr3
+    %2:sgpr(s1) = G_IS_FPCLASS %0, 3
+    %3:sgpr(s32) = G_SEXT %2
+    G_STORE %3, %1 :: (store (s32), addrspace 1)
+    S_ENDPGM 0
+...
+
+---
+name: test_eliminate_zext_to_vgpr
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr2_sgpr3
+    ; CHECK-LABEL: name: test_eliminate_zext_to_vgpr
+    ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+    ; CHECK-NEXT: [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY2]](s32), 3
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[IS_FPCLASS]](s1), [[C1]], [[C]]
+    ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[COPY1]](p1) :: (store (s32), addrspace 1)
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(p1) = COPY $sgpr2_sgpr3
+    %2:sgpr(s1) = G_IS_FPCLASS %0, 3
+    %3:sgpr(s32) = G_ZEXT %2
+    G_STORE %3, %1 :: (store (s32), addrspace 1)
+    S_ENDPGM 0
+...
+
+---
+name: test_eliminate_sext_to_s64_vgpr
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr2_sgpr3
+    ; CHECK-LABEL: name: test_eliminate_sext_to_s64_vgpr
+    ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+    ; CHECK-NEXT: [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY2]](s32), 3
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s64) = G_SELECT [[IS_FPCLASS]](s1), [[C1]], [[C]]
+    ; CHECK-NEXT: G_STORE [[SELECT]](s64), [[COPY1]](p1) :: (store (s64), addrspace 1)
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(p1) = COPY $sgpr2_sgpr3
+    %2:sgpr(s1) = G_IS_FPCLASS %0, 3
+    %3:sgpr(s64) = G_SEXT %2
+    G_STORE %3, %1 :: (store (s64), addrspace 1)
+    S_ENDPGM 0
+...
+
+---
+name: test_eliminate_zext_to_s64_vgpr
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr2_sgpr3
+    ; CHECK-LABEL: name: test_eliminate_zext_to_s64_vgpr
+    ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+    ; CHECK-NEXT: [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY2]](s32), 3
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s64) = G_SELECT [[IS_FPCLASS]](s1), [[C1]], [[C]]
+    ; CHECK-NEXT: G_STORE [[SELECT]](s64), [[COPY1]](p1) :: (store (s64), addrspace 1)
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(p1) = COPY $sgpr2_sgpr3
+    %2:sgpr(s1) = G_IS_FPCLASS %0, 3
+    %3:sgpr(s64) = G_ZEXT %2
+    G_STORE %3, %1 :: (store (s64), addrspace 1)
+    S_ENDPGM 0
+...
+
+---
+name: test_eliminate_anyext_to_vgpr
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr2_sgpr3
+    ; CHECK-LABEL: name: test_eliminate_anyext_to_vgpr
+    ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+    ; CHECK-NEXT: [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY2]](s32), 3
+    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[IS_FPCLASS]](s1)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_COPY_SCC_VCC]](s32)
+    ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[COPY1]](p1) :: (store (s32), addrspace 1)
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(p1) = COPY $sgpr2_sgpr3
+    %2:sgpr(s1) = G_IS_FPCLASS %0, 3
+    %3:sgpr(s32) = G_ANYEXT %2
+    G_STORE %3, %1 :: (store (s32), addrspace 1)
+    S_ENDPGM 0
+...
+
+---
+name: test_eliminate_fcmp_source
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3
+    ; CHECK-LABEL: name: test_eliminate_fcmp_source
+    ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+    ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY3]](s32), [[COPY4]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C]]
+    ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[COPY2]](p1) :: (store (s32), addrspace 1)
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(p1) = COPY $sgpr2_sgpr3
+    %3:sgpr(s1) = G_FCMP floatpred(oeq), %0, %1
+    %4:sgpr(s32) = G_ZEXT %3
+    G_STORE %4, %2 :: (store (s32), addrspace 1)
+    S_ENDPGM 0
+...
+
+---
+name: test_eliminate_multiple_vgpr_copies
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr2_sgpr3, $sgpr4_sgpr5
+    ; CHECK-LABEL: name: test_eliminate_multiple_vgpr_copies
+    ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3, $sgpr4_sgpr5
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(p1) = COPY $sgpr4_sgpr5
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+    ; CHECK-NEXT: [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY3]](s32), 3
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[IS_FPCLASS]](s1), [[C1]], [[C]]
+    ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[COPY1]](p1) :: (store (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
+    ; CHECK-NEXT: G_STORE [[COPY4]](s32), [[COPY2]](p1) :: (store (s32), addrspace 1)
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(p1) = COPY $sgpr2_sgpr3
+    %2:sgpr(p1) = COPY $sgpr4_sgpr5
+    %3:sgpr(s1) = G_IS_FPCLASS %0, 3
+    %4:sgpr(s32) = G_SEXT %3
+    G_STORE %4, %1 :: (store (s32), addrspace 1)
+    G_STORE %4, %2 :: (store (s32), addrspace 1)
+    S_ENDPGM 0
+...
+
+---
+name: test_mixed_uses
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3
+    ; CHECK-LABEL: name: test_mixed_uses
+    ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+    ; CHECK-NEXT: [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY3]](s32), 3
+    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[IS_FPCLASS]](s1)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
+    ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SELECT]], [[COPY1]]
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[IS_FPCLASS]](s1), [[C4]], [[C3]]
+    ; CHECK-NEXT: G_STORE [[SELECT1]](s32), [[COPY2]](p1) :: (store (s32), addrspace 1)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
+    ; CHECK-NEXT: G_STORE [[COPY4]](s32), [[COPY2]](p1) :: (store (s32), addrspace 1)
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(p1) = COPY $sgpr2_sgpr3
+    %3:sgpr(s1) = G_IS_FPCLASS %0, 3
+    %4:sgpr(s32) = G_SEXT %3
+    %5:sgpr(s32) = G_ADD %4, %1
+    G_STORE %4, %2 :: (store (s32), addrspace 1)
+    G_STORE %5, %2 :: (store (s32), addrspace 1)
+    S_ENDPGM 0
+...
+
+---
+name: test_no_eliminate_branch
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  ; CHECK-LABEL: name: test_no_eliminate_branch
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY1]](s32), 3
+  ; CHECK-NEXT:   [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[IS_FPCLASS]](s1)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
+  ; CHECK-NEXT:   G_BRCOND [[AND]](s32), %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s1) = G_IS_FPCLASS %0, 3
+    G_BRCOND %1, %bb.1
+
+  bb.1:
+    S_ENDPGM 0
+...
+
+---
+name: test_no_eliminate_scalar_arithmetic_use
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3
+    ; CHECK-LABEL: name: test_no_eliminate_scalar_arithmetic_use
+    ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+    ; CHECK-NEXT: [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY3]](s32), 3
+    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[IS_FPCLASS]](s1)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
+    ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SELECT]], [[COPY1]]
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
+    ; CHECK-NEXT: G_STORE [[COPY4]](s32), [[COPY2]](p1) :: (store (s32), addrspace 1)
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(p1) = COPY $sgpr2_sgpr3
+    %3:sgpr(s1) = G_IS_FPCLASS %0, 3
+    %4:sgpr(s32) = G_SEXT %3
+    %5:sgpr(s32) = G_ADD %4, %1
+    G_STORE %5, %2 :: (store (s32), addrspace 1)
+    S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index cc5f6f842625c..8dd57a6c04386 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -45,90 +45,40 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
 ; GFX7GLISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX7GLISEL-NEXT:    s_endpgm
 ;
-; GFX8SELDAG-LABEL: sgpr_isnan_f16:
-; GFX8SELDAG:       ; %bb.0:
-; GFX8SELDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX8SELDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8SELDAG-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
-; GFX8SELDAG-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8SELDAG-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
-; GFX8SELDAG-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8SELDAG-NEXT:    flat_store_dword v[0:1], v2
-; GFX8SELDAG-NEXT:    s_endpgm
-;
-; GFX8GLISEL-LABEL: sgpr_isnan_f16:
-; GFX8GLISEL:       ; %bb.0:
-; GFX8GLISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX8GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GLISEL-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
-; GFX8GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX8GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX8GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX8GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX8GLISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GLISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8GLISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GLISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8GLISEL-NEXT:    s_endpgm
-;
-; GFX9SELDAG-LABEL: sgpr_isnan_f16:
-; GFX9SELDAG:       ; %bb.0:
-; GFX9SELDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX9SELDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9SELDAG-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9SELDAG-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
-; GFX9SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
-; GFX9SELDAG-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9SELDAG-NEXT:    s_endpgm
-;
-; GFX9GLISEL-LABEL: sgpr_isnan_f16:
-; GFX9GLISEL:       ; %bb.0:
-; GFX9GLISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX9GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GLISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GLISEL-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
-; GFX9GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX9GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX9GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX9GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX9GLISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX9GLISEL-NEXT:    s_endpgm
-;
-; GFX10SELDAG-LABEL: sgpr_isnan_f16:
-; GFX10SELDAG:       ; %bb.0:
-; GFX10SELDAG-NEXT:    s_clause 0x1
-; GFX10SELDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX10SELDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10SELDAG-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10SELDAG-NEXT:    v_cmp_class_f16_e64 s2, s2, 3
-; GFX10SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX10SELDAG-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX10SELDAG-NEXT:    s_endpgm
-;
-; GFX10GLISEL-LABEL: sgpr_isnan_f16:
-; GFX10GLISEL:       ; %bb.0:
-; GFX10GLISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
-; GFX10GLISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GLISEL-NEXT:    v_cmp_class_f16_e64 s2, s0, 3
-; GFX10GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX10GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX10GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX10GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX10GLISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX10GLISEL-NEXT:    s_endpgm
+; GFX8CHECK-LABEL: sgpr_isnan_f16:
+; GFX8CHECK:       ; %bb.0:
+; GFX8CHECK-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX8CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8CHECK-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
+; GFX8CHECK-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
+; GFX8CHECK-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8CHECK-NEXT:    flat_store_dword v[0:1], v2
+; GFX8CHECK-NEXT:    s_endpgm
+;
+; GFX9CHECK-LABEL: sgpr_isnan_f16:
+; GFX9CHECK:       ; %bb.0:
+; GFX9CHECK-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX9CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9CHECK-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
+; GFX9CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
+; GFX9CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9CHECK-NEXT:    s_endpgm
+;
+; GFX10CHECK-LABEL: sgpr_isnan_f16:
+; GFX10CHECK:       ; %bb.0:
+; GFX10CHECK-NEXT:    s_clause 0x1
+; GFX10CHECK-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX10CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10CHECK-NEXT:    v_cmp_class_f16_e64 s2, s2, 3
+; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX10CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10CHECK-NEXT:    s_endpgm
 ;
 ; GFX11SELDAG-TRUE16-LABEL: sgpr_isnan_f16:
 ; GFX11SELDAG-TRUE16:       ; %bb.0:
@@ -156,36 +106,26 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
 ;
 ; GFX11GLISEL-TRUE16-LABEL: sgpr_isnan_f16:
 ; GFX11GLISEL-TRUE16:       ; %bb.0:
-; GFX11GLISEL-TRUE16-NEXT:    s_load_b32 s0, s[4:5], 0x2c
-; GFX11GLISEL-TRUE16-NEXT:    v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 0
-; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e32 vcc_lo, s0, v0.l
+; GFX11GLISEL-TRUE16-NEXT:    s_clause 0x1
+; GFX11GLISEL-TRUE16-NEXT:    s_load_b32 s2, s[4:5], 0x2c
 ; GFX11GLISEL-TRUE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GLISEL-TRUE16-NEXT:    s_cmp_lg_u32 vcc_lo, 0
-; GFX11GLISEL-TRUE16-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX11GLISEL-TRUE16-NEXT:    s_and_b32 s2, s2, 1
-; GFX11GLISEL-TRUE16-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX11GLISEL-TRUE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11GLISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11GLISEL-TRUE16-NEXT:    v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 0
 ; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e32 vcc_lo, s2, v0.l
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
 ; GFX11GLISEL-TRUE16-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11GLISEL-TRUE16-NEXT:    s_endpgm
 ;
 ; GFX11GLISEL-FAKE16-LABEL: sgpr_isnan_f16:
 ; GFX11GLISEL-FAKE16:       ; %bb.0:
-; GFX11GLISEL-FAKE16-NEXT:    s_load_b32 s0, s[4:5], 0x2c
-; GFX11GLISEL-FAKE16-NEXT:    v_mov_b32_e32 v1, 0
-; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s2, s0, 3
+; GFX11GLISEL-FAKE16-NEXT:    s_clause 0x1
+; GFX11GLISEL-FAKE16-NEXT:    s_load_b32 s2, s[4:5], 0x2c
 ; GFX11GLISEL-FAKE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GLISEL-FAKE16-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX11GLISEL-FAKE16-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX11GLISEL-FAKE16-NEXT:    s_and_b32 s2, s2, 1
-; GFX11GLISEL-FAKE16-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX11GLISEL-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11GLISEL-FAKE16-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11GLISEL-FAKE16-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GLISEL-FAKE16-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s2, s2, 3
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX11GLISEL-FAKE16-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11GLISEL-FAKE16-NEXT:    s_endpgm
   %result = call i1 @llvm.is.fpclass.f16(half %x, i32 3)
   %sext = sext i1 %result to i32
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll
index 0a9fe10874c38..639d2c136f128 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll
@@ -33,132 +33,58 @@ define amdgpu_kernel void @sgpr_isnan_f32(ptr addrspace(1) %out, float %x) {
 ; GFX7GLISEL-NEXT:    s_mov_b32 s2, -1
 ; GFX7GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_cmp_class_f32_e64 s[4:5], s3, 3
-; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5]
-; GFX7GLISEL-NEXT:    s_cselect_b32 s3, 1, 0
-; GFX7GLISEL-NEXT:    s_and_b32 s3, s3, 1
-; GFX7GLISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX7GLISEL-NEXT:    s_cselect_b32 s3, -1, 0
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, s3
+; GFX7GLISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
 ; GFX7GLISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7GLISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX7GLISEL-NEXT:    s_endpgm
 ;
-; GFX8SELDAG-LABEL: sgpr_isnan_f32:
-; GFX8SELDAG:       ; %bb.0:
-; GFX8SELDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX8SELDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8SELDAG-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
-; GFX8SELDAG-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8SELDAG-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
-; GFX8SELDAG-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8SELDAG-NEXT:    flat_store_dword v[0:1], v2
-; GFX8SELDAG-NEXT:    s_endpgm
-;
-; GFX8GLISEL-LABEL: sgpr_isnan_f32:
-; GFX8GLISEL:       ; %bb.0:
-; GFX8GLISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX8GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GLISEL-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
-; GFX8GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX8GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX8GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX8GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX8GLISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GLISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8GLISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GLISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8GLISEL-NEXT:    s_endpgm
-;
-; GFX9SELDAG-LABEL: sgpr_isnan_f32:
-; GFX9SELDAG:       ; %bb.0:
-; GFX9SELDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX9SELDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9SELDAG-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9SELDAG-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
-; GFX9SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
-; GFX9SELDAG-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9SELDAG-NEXT:    s_endpgm
-;
-; GFX9GLISEL-LABEL: sgpr_isnan_f32:
-; GFX9GLISEL:       ; %bb.0:
-; GFX9GLISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX9GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GLISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GLISEL-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
-; GFX9GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX9GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX9GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX9GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX9GLISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX9GLISEL-NEXT:    s_endpgm
-;
-; GFX10SELDAG-LABEL: sgpr_isnan_f32:
-; GFX10SELDAG:       ; %bb.0:
-; GFX10SELDAG-NEXT:    s_clause 0x1
-; GFX10SELDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX10SELDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10SELDAG-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10SELDAG-NEXT:    v_cmp_class_f32_e64 s2, s2, 3
-; GFX10SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX10SELDAG-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX10SELDAG-NEXT:    s_endpgm
-;
-; GFX10GLISEL-LABEL: sgpr_isnan_f32:
-; GFX10GLISEL:       ; %bb.0:
-; GFX10GLISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
-; GFX10GLISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GLISEL-NEXT:    v_cmp_class_f32_e64 s2, s0, 3
-; GFX10GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX10GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX10GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX10GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX10GLISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX10GLISEL-NEXT:    s_endpgm
-;
-; GFX11SELDAG-LABEL: sgpr_isnan_f32:
-; GFX11SELDAG:       ; %bb.0:
-; GFX11SELDAG-NEXT:    s_clause 0x1
-; GFX11SELDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
-; GFX11SELDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11SELDAG-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11SELDAG-NEXT:    v_cmp_class_f32_e64 s2, s2, 3
-; GFX11SELDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX11SELDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11SELDAG-NEXT:    s_endpgm
-;
-; GFX11GLISEL-LABEL: sgpr_isnan_f32:
-; GFX11GLISEL:       ; %bb.0:
-; GFX11GLISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
-; GFX11GLISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX11GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GLISEL-NEXT:    v_cmp_class_f32_e64 s2, s0, 3
-; GFX11GLISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX11GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX11GLISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX11GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX11GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11GLISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11GLISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX11GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GLISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX11GLISEL-NEXT:    s_endpgm
+; GFX8CHECK-LABEL: sgpr_isnan_f32:
+; GFX8CHECK:       ; %bb.0:
+; GFX8CHECK-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX8CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8CHECK-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
+; GFX8CHECK-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
+; GFX8CHECK-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8CHECK-NEXT:    flat_store_dword v[0:1], v2
+; GFX8CHECK-NEXT:    s_endpgm
+;
+; GFX9CHECK-LABEL: sgpr_isnan_f32:
+; GFX9CHECK:       ; %bb.0:
+; GFX9CHECK-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX9CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9CHECK-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
+; GFX9CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
+; GFX9CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9CHECK-NEXT:    s_endpgm
+;
+; GFX10CHECK-LABEL: sgpr_isnan_f32:
+; GFX10CHECK:       ; %bb.0:
+; GFX10CHECK-NEXT:    s_clause 0x1
+; GFX10CHECK-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX10CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10CHECK-NEXT:    v_cmp_class_f32_e64 s2, s2, 3
+; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX10CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10CHECK-NEXT:    s_endpgm
+;
+; GFX11CHECK-LABEL: sgpr_isnan_f32:
+; GFX11CHECK:       ; %bb.0:
+; GFX11CHECK-NEXT:    s_clause 0x1
+; GFX11CHECK-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11CHECK-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11CHECK-NEXT:    v_cmp_class_f32_e64 s2, s2, 3
+; GFX11CHECK-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX11CHECK-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11CHECK-NEXT:    s_endpgm
   %result = call i1 @llvm.is.fpclass.f32(float %x, i32 3)  ; nan
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
@@ -183,14 +109,9 @@ define amdgpu_kernel void @sgpr_isnan_f64(ptr addrspace(1) %out, double %x) {
 ; GFX7GLISEL:       ; %bb.0:
 ; GFX7GLISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
 ; GFX7GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7GLISEL-NEXT:    v_cmp_class_f64_e64 s[4:5], s[2:3], 3
+; GFX7GLISEL-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 3
+; GFX7GLISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[2:3]
 ; GFX7GLISEL-NEXT:    s_mov_b32 s2, -1
-; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5]
-; GFX7GLISEL-NEXT:    s_cselect_b32 s3, 1, 0
-; GFX7GLISEL-NEXT:    s_and_b32 s3, s3, 1
-; GFX7GLISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX7GLISEL-NEXT:    s_cselect_b32 s3, -1, 0
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX7GLISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7GLISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX7GLISEL-NEXT:    s_endpgm
@@ -213,92 +134,40 @@ define amdgpu_kernel void @sgpr_isnan_f64(ptr addrspace(1) %out, double %x) {
 ; GFX8GLISEL-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 3
 ; GFX8GLISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8GLISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX8GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX8GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX8GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX8GLISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8GLISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
 ; GFX8GLISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8GLISEL-NEXT:    s_endpgm
 ;
-; GFX9SELDAG-LABEL: sgpr_isnan_f64:
-; GFX9SELDAG:       ; %bb.0:
-; GFX9SELDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9SELDAG-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9SELDAG-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 3
-; GFX9SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
-; GFX9SELDAG-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9SELDAG-NEXT:    s_endpgm
-;
-; GFX9GLISEL-LABEL: sgpr_isnan_f64:
-; GFX9GLISEL:       ; %bb.0:
-; GFX9GLISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9GLISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GLISEL-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 3
-; GFX9GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX9GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX9GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX9GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX9GLISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX9GLISEL-NEXT:    s_endpgm
-;
-; GFX10SELDAG-LABEL: sgpr_isnan_f64:
-; GFX10SELDAG:       ; %bb.0:
-; GFX10SELDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10SELDAG-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10SELDAG-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
-; GFX10SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX10SELDAG-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX10SELDAG-NEXT:    s_endpgm
-;
-; GFX10GLISEL-LABEL: sgpr_isnan_f64:
-; GFX10GLISEL:       ; %bb.0:
-; GFX10GLISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10GLISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GLISEL-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
-; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX10GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX10GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX10GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX10GLISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX10GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX10GLISEL-NEXT:    s_endpgm
-;
-; GFX11SELDAG-LABEL: sgpr_isnan_f64:
-; GFX11SELDAG:       ; %bb.0:
-; GFX11SELDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11SELDAG-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11SELDAG-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
-; GFX11SELDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX11SELDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11SELDAG-NEXT:    s_endpgm
-;
-; GFX11GLISEL-LABEL: sgpr_isnan_f64:
-; GFX11GLISEL:       ; %bb.0:
-; GFX11GLISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11GLISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX11GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GLISEL-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
-; GFX11GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX11GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX11GLISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11GLISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX11GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX11GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11GLISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11GLISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX11GLISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX11GLISEL-NEXT:    s_endpgm
+; GFX9CHECK-LABEL: sgpr_isnan_f64:
+; GFX9CHECK:       ; %bb.0:
+; GFX9CHECK-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9CHECK-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 3
+; GFX9CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
+; GFX9CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9CHECK-NEXT:    s_endpgm
+;
+; GFX10CHECK-LABEL: sgpr_isnan_f64:
+; GFX10CHECK:       ; %bb.0:
+; GFX10CHECK-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10CHECK-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
+; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX10CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10CHECK-NEXT:    s_endpgm
+;
+; GFX11CHECK-LABEL: sgpr_isnan_f64:
+; GFX11CHECK:       ; %bb.0:
+; GFX11CHECK-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11CHECK-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
+; GFX11CHECK-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX11CHECK-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11CHECK-NEXT:    s_endpgm
   %result = call i1 @llvm.is.fpclass.f64(double %x, i32 3)  ; nan
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
@@ -1599,3 +1468,10 @@ declare <7 x i1> @llvm.is.fpclass.v7f32(<7 x float>, i32)
 declare <8 x i1> @llvm.is.fpclass.v8f32(<8 x float>, i32)
 declare <16 x i1> @llvm.is.fpclass.v16f32(<16 x float>, i32)
 declare <2 x i1> @llvm.is.fpclass.v2f64(<2 x double>, i32)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX10GLISEL: {{.*}}
+; GFX10SELDAG: {{.*}}
+; GFX11GLISEL: {{.*}}
+; GFX11SELDAG: {{.*}}
+; GFX9GLISEL: {{.*}}
+; GFX9SELDAG: {{.*}}

>From cbcb9b9e09a588a06b784e12ac561851d636cfcb Mon Sep 17 00:00:00 2001
From: Vang Thao <vang.thao at amd.com>
Date: Thu, 5 Feb 2026 16:12:27 -0800
Subject: [PATCH 2/3] Move combine into AMDGPURegBankCombiner

---
 llvm/lib/Target/AMDGPU/AMDGPUCombine.td       |  10 +-
 .../Target/AMDGPU/AMDGPURegBankCombiner.cpp   |  67 ++++
 .../Target/AMDGPU/AMDGPURegBankLegalize.cpp   |  60 ----
 .../regbankcombiner-copy-scc-vcc.mir          | 112 +++----
 ...regbanklegalize-eliminate-copy-scc-vcc.mir | 313 ------------------
 5 files changed, 118 insertions(+), 444 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbanklegalize-eliminate-copy-scc-vcc.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 7f00eadbf3f3f..4bc9a17b520fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -101,6 +101,14 @@ def fmed3_intrinsic_to_clamp : GICombineRule<
          [{ return matchFPMed3ToClamp(*${fmed3}, ${matchinfo}); }]),
   (apply [{ applyClamp(*${fmed3}, ${matchinfo}); }])>;
 
+def copy_scc_vcc_matchdata : GIDefMatchData<"CopySccVccMatchInfo">;
+
+def copy_scc_vcc : GICombineRule<
+  (defs root:$copy, copy_scc_vcc_matchdata:$matchinfo),
+  (match (wip_match_opcode COPY):$copy,
+         [{ return matchCopySccVcc(*${copy}, ${matchinfo}); }]),
+  (apply [{ applyCopySccVcc(*${copy}, ${matchinfo}); }])>;
+
 def remove_fcanonicalize : GICombineRule<
   (defs root:$fcanonicalize, register_matchinfo:$matchinfo),
   (match (wip_match_opcode G_FCANONICALIZE):$fcanonicalize,
@@ -239,5 +247,5 @@ def AMDGPURegBankCombiner : GICombiner<
    fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
    identity_combines, redundant_and, constant_fold_cast_op,
    cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines,
-   d16_load]> {
+   d16_load, copy_scc_vcc]> {
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index e2e84ce2e6219..4f47b5a2a8488 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -74,6 +74,11 @@ class AMDGPURegBankCombinerImpl : public Combiner {
     Register Val0, Val1, Val2;
   };
 
+  struct CopySccVccMatchInfo {
+    Register VccReg;
+    int64_t TrueVal; // -1 for SEXT, 1 for ZEXT
+  };
+
   MinMaxMedOpc getMinMaxPair(unsigned Opc) const;
 
   template <class m_Cst, typename CstTy>
@@ -93,6 +98,9 @@ class AMDGPURegBankCombinerImpl : public Combiner {
   bool applyD16Load(unsigned D16Opc, MachineInstr &DstMI,
                     MachineInstr *SmallLoad, Register ToOverwriteD16) const;
 
+  bool matchCopySccVcc(MachineInstr &MI, CopySccVccMatchInfo &MatchInfo) const;
+  void applyCopySccVcc(MachineInstr &MI, CopySccVccMatchInfo &MatchInfo) const;
+
 private:
   SIModeRegisterDefaults getMode() const;
   bool getIEEE() const;
@@ -478,6 +486,65 @@ bool AMDGPURegBankCombinerImpl::applyD16Load(
   return true;
 }
 
+// Eliminate VCC->SGPR->VGPR register bounce for uniform boolean extensions.
+// Match: COPY (G_SELECT (G_AND (G_AMDGPU_COPY_SCC_VCC %vcc), 1), {-1|1}, 0)
+// Replace with: G_SELECT %vcc, {-1|1}, 0
+bool AMDGPURegBankCombinerImpl::matchCopySccVcc(
+    MachineInstr &MI, CopySccVccMatchInfo &MatchInfo) const {
+  assert(MI.getOpcode() == AMDGPU::COPY);
+
+  Register VgprDst = MI.getOperand(0).getReg();
+  Register SgprSrc = MI.getOperand(1).getReg();
+
+  if (!VgprDst.isVirtual() || !SgprSrc.isVirtual())
+    return false;
+
+  if (!isVgprRegBank(VgprDst))
+    return false;
+
+  // Match: G_SELECT (G_AND (G_AMDGPU_COPY_SCC_VCC %vcc), 1), TrueReg, 0
+  MachineInstr *CopySccVcc;
+  Register TrueReg;
+  if (!mi_match(SgprSrc, MRI,
+                m_GISelect(m_GAnd(m_MInstr(CopySccVcc), m_SpecificICst(1)),
+                           m_Reg(TrueReg), m_ZeroInt())))
+    return false;
+
+  if (CopySccVcc->getOpcode() != AMDGPU::G_AMDGPU_COPY_SCC_VCC)
+    return false;
+
+  Register VccReg = CopySccVcc->getOperand(1).getReg();
+
+  // TrueReg must be constant -1 (SEXT) or 1 (ZEXT)
+  auto TrueConst = getIConstantVRegValWithLookThrough(TrueReg, MRI);
+  if (!TrueConst)
+    return false;
+
+  int64_t TrueVal = TrueConst->Value.getSExtValue();
+  if (TrueVal != -1 && TrueVal != 1)
+    return false;
+
+  MatchInfo.VccReg = VccReg;
+  MatchInfo.TrueVal = TrueVal;
+  return true;
+}
+
+void AMDGPURegBankCombinerImpl::applyCopySccVcc(
+    MachineInstr &MI, CopySccVccMatchInfo &MatchInfo) const {
+  Register VgprDst = MI.getOperand(0).getReg();
+  LLT Ty = MRI.getType(VgprDst);
+  const RegisterBank &VgprRB = RBI.getRegBank(AMDGPU::VGPRRegBankID);
+
+  auto TrueVal = B.buildConstant(Ty, MatchInfo.TrueVal);
+  MRI.setRegBank(TrueVal.getReg(0), VgprRB);
+
+  auto FalseVal = B.buildConstant(Ty, 0);
+  MRI.setRegBank(FalseVal.getReg(0), VgprRB);
+
+  B.buildSelect(VgprDst, MatchInfo.VccReg, TrueVal, FalseVal);
+  MI.eraseFromParent();
+}
+
 SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const {
   return MF.getInfo<SIMachineFunctionInfo>()->getMode();
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index eb6a5b0c9edf1..2e009cdec06ac 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -46,12 +46,6 @@ m_GAMDGPUReadAnyLane(const SrcTy &Src) {
   return UnaryOp_match<SrcTy, AMDGPU::G_AMDGPU_READANYLANE>(Src);
 }
 
-template <typename SrcTy>
-inline UnaryOp_match<SrcTy, AMDGPU::G_AMDGPU_COPY_SCC_VCC>
-m_GAMDGPUCopySccVcc(const SrcTy &Src) {
-  return UnaryOp_match<SrcTy, AMDGPU::G_AMDGPU_COPY_SCC_VCC>(Src);
-}
-
 class AMDGPURegBankLegalize : public MachineFunctionPass {
 public:
   static char ID;
@@ -142,7 +136,6 @@ class AMDGPURegBankLegalizeCombiner {
   bool tryEliminateReadAnyLane(MachineInstr &Copy);
   void tryCombineCopy(MachineInstr &MI);
   void tryCombineS1AnyExt(MachineInstr &MI);
-  bool tryEliminateCopySccVcc(MachineInstr &MI);
 };
 
 bool AMDGPURegBankLegalizeCombiner::isLaneMask(Register Reg) {
@@ -403,57 +396,6 @@ void AMDGPURegBankLegalizeCombiner::tryCombineS1AnyExt(MachineInstr &MI) {
   llvm_unreachable("missing anyext + trunc combine");
 }
 
-bool AMDGPURegBankLegalizeCombiner::tryEliminateCopySccVcc(MachineInstr &MI) {
-  // Eliminate VCC->SGPR->VGPR bounce for uniform boolean extensions. This is
-  // caused by UniInVcc which creates G_AMDGPU_COPY_SCC_VCC forcing the result
-  // to SGPR which in turn is forced back to VGPR by a subsequent instruction.
-  //
-  // %vcc:vcc(s1) = ...
-  // %sgpr:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %vcc
-  // %and:sgpr(s32) = G_AND %sgpr, 1
-  // %sel:sgpr(s32) = G_SELECT %and, {-1|1}, 0
-  // %vgpr:vgpr(s32) = COPY %sel
-  // ->
-  // %vgpr:vgpr(s32) = G_SELECT %vcc, {-1|1}, 0
-
-  Register VgprDst = MI.getOperand(0).getReg();
-  Register SgprSrc = MI.getOperand(1).getReg();
-
-  if (!VgprDst.isVirtual() || !SgprSrc.isVirtual())
-    return false;
-
-  if (MRI.getRegBankOrNull(VgprDst) != VgprRB ||
-      MRI.getRegBankOrNull(SgprSrc) != SgprRB)
-    return false;
-
-  // Match: G_SELECT (G_AND (G_AMDGPU_COPY_SCC_VCC %vcc), 1), SelTrueReg, 0
-  Register VccReg, SelTrueReg;
-  if (!mi_match(SgprSrc, MRI,
-                m_GISelect(m_GAnd(m_GAMDGPUCopySccVcc(m_Reg(VccReg)),
-                                  m_SpecificICst(1)),
-                           m_Reg(SelTrueReg), m_ZeroInt())))
-    return false;
-
-  if (MRI.getRegBankOrNull(VccReg) != VccRB)
-    return false;
-
-  // SelTrueReg must be constant -1 (SEXT) or 1 (ZEXT).
-  auto SelTrueConst = getIConstantVRegValWithLookThrough(SelTrueReg, MRI);
-  if (!SelTrueConst)
-    return false;
-  int64_t SelTrueVal = SelTrueConst->Value.getSExtValue();
-  if (SelTrueVal != -1 && SelTrueVal != 1)
-    return false;
-
-  B.setInstrAndDebugLoc(MI);
-  LLT Ty = MRI.getType(VgprDst);
-  B.buildSelect(VgprDst, VccReg, B.buildConstant({VgprRB, Ty}, SelTrueVal),
-                B.buildConstant({VgprRB, Ty}, 0));
-
-  eraseInstr(MI, MRI);
-  return true;
-}
-
 // Search through MRI for virtual registers with sgpr register bank and S1 LLT.
 [[maybe_unused]] static Register getAnySgprS1(const MachineRegisterInfo &MRI) {
   const LLT S1 = LLT::scalar(1);
@@ -583,8 +525,6 @@ bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
   for (MachineBasicBlock &MBB : MF) {
     for (MachineInstr &MI : make_early_inc_range(MBB)) {
       if (MI.getOpcode() == AMDGPU::COPY) {
-        if (Combiner.tryEliminateCopySccVcc(MI))
-          continue;
         Combiner.tryCombineCopy(MI);
         continue;
       }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-copy-scc-vcc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-copy-scc-vcc.mir
index e6894f3db6740..1655a9e9e80b5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-copy-scc-vcc.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-copy-scc-vcc.mir
@@ -16,14 +16,10 @@ body: |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[ICMP]](s1)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
-    ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -54,14 +50,10 @@ body: |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[ICMP]](s1)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
-    ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -92,14 +84,10 @@ body: |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[ICMP]](s1)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 -1
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s64) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[SELECT]](s64)
-    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](s64)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0_vgpr1
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -130,14 +118,10 @@ body: |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[ICMP]](s1)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 1
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s64) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[SELECT]](s64)
-    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](s64)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0_vgpr1
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -168,14 +152,10 @@ body: |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
-    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[FCMP]](s1)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
-    ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[C]], [[C1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -206,16 +186,14 @@ body: |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[ICMP]](s1)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
-    ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
-    ; CHECK-NEXT: $vgpr1 = COPY [[COPY3]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[SELECT1]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -247,14 +225,10 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY]](s32), 3
-    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[IS_FPCLASS]](s1)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
-    ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[IS_FPCLASS]](s1), [[C]], [[C1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:vcc(s1) = G_IS_FPCLASS %0, 3
@@ -284,14 +258,10 @@ body: |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[ICMP]](s1)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[AMDGPU_COPY_SCC_VCC]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
-    ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -329,11 +299,13 @@ body: |
     ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
     ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
     ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[C4]]
     ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SELECT]], [[COPY2]]
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
-    ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32)
-    ; CHECK-NEXT: $vgpr1 = COPY [[COPY4]](s32)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[SELECT1]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[COPY3]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbanklegalize-eliminate-copy-scc-vcc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbanklegalize-eliminate-copy-scc-vcc.mir
deleted file mode 100644
index ad89b8ba603c8..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbanklegalize-eliminate-copy-scc-vcc.mir
+++ /dev/null
@@ -1,313 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s
-
----
-name: test_eliminate_sext_to_vgpr
-legalized: true
-tracksRegLiveness: true
-
-body: |
-  bb.0:
-    liveins: $sgpr0, $sgpr2_sgpr3
-    ; CHECK-LABEL: name: test_eliminate_sext_to_vgpr
-    ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-    ; CHECK-NEXT: [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY2]](s32), 3
-    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[IS_FPCLASS]](s1), [[C1]], [[C]]
-    ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[COPY1]](p1) :: (store (s32), addrspace 1)
-    ; CHECK-NEXT: S_ENDPGM 0
-    %0:sgpr(s32) = COPY $sgpr0
-    %1:sgpr(p1) = COPY $sgpr2_sgpr3
-    %2:sgpr(s1) = G_IS_FPCLASS %0, 3
-    %3:sgpr(s32) = G_SEXT %2
-    G_STORE %3, %1 :: (store (s32), addrspace 1)
-    S_ENDPGM 0
-...
-
----
-name: test_eliminate_zext_to_vgpr
-legalized: true
-tracksRegLiveness: true
-
-body: |
-  bb.0:
-    liveins: $sgpr0, $sgpr2_sgpr3
-    ; CHECK-LABEL: name: test_eliminate_zext_to_vgpr
-    ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-    ; CHECK-NEXT: [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY2]](s32), 3
-    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[IS_FPCLASS]](s1), [[C1]], [[C]]
-    ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[COPY1]](p1) :: (store (s32), addrspace 1)
-    ; CHECK-NEXT: S_ENDPGM 0
-    %0:sgpr(s32) = COPY $sgpr0
-    %1:sgpr(p1) = COPY $sgpr2_sgpr3
-    %2:sgpr(s1) = G_IS_FPCLASS %0, 3
-    %3:sgpr(s32) = G_ZEXT %2
-    G_STORE %3, %1 :: (store (s32), addrspace 1)
-    S_ENDPGM 0
-...
-
----
-name: test_eliminate_sext_to_s64_vgpr
-legalized: true
-tracksRegLiveness: true
-
-body: |
-  bb.0:
-    liveins: $sgpr0, $sgpr2_sgpr3
-    ; CHECK-LABEL: name: test_eliminate_sext_to_s64_vgpr
-    ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-    ; CHECK-NEXT: [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY2]](s32), 3
-    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 0
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 -1
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s64) = G_SELECT [[IS_FPCLASS]](s1), [[C1]], [[C]]
-    ; CHECK-NEXT: G_STORE [[SELECT]](s64), [[COPY1]](p1) :: (store (s64), addrspace 1)
-    ; CHECK-NEXT: S_ENDPGM 0
-    %0:sgpr(s32) = COPY $sgpr0
-    %1:sgpr(p1) = COPY $sgpr2_sgpr3
-    %2:sgpr(s1) = G_IS_FPCLASS %0, 3
-    %3:sgpr(s64) = G_SEXT %2
-    G_STORE %3, %1 :: (store (s64), addrspace 1)
-    S_ENDPGM 0
-...
-
----
-name: test_eliminate_zext_to_s64_vgpr
-legalized: true
-tracksRegLiveness: true
-
-body: |
-  bb.0:
-    liveins: $sgpr0, $sgpr2_sgpr3
-    ; CHECK-LABEL: name: test_eliminate_zext_to_s64_vgpr
-    ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-    ; CHECK-NEXT: [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY2]](s32), 3
-    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 0
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 1
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s64) = G_SELECT [[IS_FPCLASS]](s1), [[C1]], [[C]]
-    ; CHECK-NEXT: G_STORE [[SELECT]](s64), [[COPY1]](p1) :: (store (s64), addrspace 1)
-    ; CHECK-NEXT: S_ENDPGM 0
-    %0:sgpr(s32) = COPY $sgpr0
-    %1:sgpr(p1) = COPY $sgpr2_sgpr3
-    %2:sgpr(s1) = G_IS_FPCLASS %0, 3
-    %3:sgpr(s64) = G_ZEXT %2
-    G_STORE %3, %1 :: (store (s64), addrspace 1)
-    S_ENDPGM 0
-...
-
----
-name: test_eliminate_anyext_to_vgpr
-legalized: true
-tracksRegLiveness: true
-
-body: |
-  bb.0:
-    liveins: $sgpr0, $sgpr2_sgpr3
-    ; CHECK-LABEL: name: test_eliminate_anyext_to_vgpr
-    ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-    ; CHECK-NEXT: [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY2]](s32), 3
-    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[IS_FPCLASS]](s1)
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_COPY_SCC_VCC]](s32)
-    ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[COPY1]](p1) :: (store (s32), addrspace 1)
-    ; CHECK-NEXT: S_ENDPGM 0
-    %0:sgpr(s32) = COPY $sgpr0
-    %1:sgpr(p1) = COPY $sgpr2_sgpr3
-    %2:sgpr(s1) = G_IS_FPCLASS %0, 3
-    %3:sgpr(s32) = G_ANYEXT %2
-    G_STORE %3, %1 :: (store (s32), addrspace 1)
-    S_ENDPGM 0
-...
-
----
-name: test_eliminate_fcmp_source
-legalized: true
-tracksRegLiveness: true
-
-body: |
-  bb.0:
-    liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3
-    ; CHECK-LABEL: name: test_eliminate_fcmp_source
-    ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-    ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY3]](s32), [[COPY4]]
-    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C]]
-    ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[COPY2]](p1) :: (store (s32), addrspace 1)
-    ; CHECK-NEXT: S_ENDPGM 0
-    %0:sgpr(s32) = COPY $sgpr0
-    %1:sgpr(s32) = COPY $sgpr1
-    %2:sgpr(p1) = COPY $sgpr2_sgpr3
-    %3:sgpr(s1) = G_FCMP floatpred(oeq), %0, %1
-    %4:sgpr(s32) = G_ZEXT %3
-    G_STORE %4, %2 :: (store (s32), addrspace 1)
-    S_ENDPGM 0
-...
-
----
-name: test_eliminate_multiple_vgpr_copies
-legalized: true
-tracksRegLiveness: true
-
-body: |
-  bb.0:
-    liveins: $sgpr0, $sgpr2_sgpr3, $sgpr4_sgpr5
-    ; CHECK-LABEL: name: test_eliminate_multiple_vgpr_copies
-    ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3, $sgpr4_sgpr5
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(p1) = COPY $sgpr4_sgpr5
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-    ; CHECK-NEXT: [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY3]](s32), 3
-    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[IS_FPCLASS]](s1), [[C1]], [[C]]
-    ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[COPY1]](p1) :: (store (s32), addrspace 1)
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
-    ; CHECK-NEXT: G_STORE [[COPY4]](s32), [[COPY2]](p1) :: (store (s32), addrspace 1)
-    ; CHECK-NEXT: S_ENDPGM 0
-    %0:sgpr(s32) = COPY $sgpr0
-    %1:sgpr(p1) = COPY $sgpr2_sgpr3
-    %2:sgpr(p1) = COPY $sgpr4_sgpr5
-    %3:sgpr(s1) = G_IS_FPCLASS %0, 3
-    %4:sgpr(s32) = G_SEXT %3
-    G_STORE %4, %1 :: (store (s32), addrspace 1)
-    G_STORE %4, %2 :: (store (s32), addrspace 1)
-    S_ENDPGM 0
-...
-
----
-name: test_mixed_uses
-legalized: true
-tracksRegLiveness: true
-
-body: |
-  bb.0:
-    liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3
-    ; CHECK-LABEL: name: test_mixed_uses
-    ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-    ; CHECK-NEXT: [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY3]](s32), 3
-    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[IS_FPCLASS]](s1)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
-    ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SELECT]], [[COPY1]]
-    ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[C4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
-    ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[IS_FPCLASS]](s1), [[C4]], [[C3]]
-    ; CHECK-NEXT: G_STORE [[SELECT1]](s32), [[COPY2]](p1) :: (store (s32), addrspace 1)
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
-    ; CHECK-NEXT: G_STORE [[COPY4]](s32), [[COPY2]](p1) :: (store (s32), addrspace 1)
-    ; CHECK-NEXT: S_ENDPGM 0
-    %0:sgpr(s32) = COPY $sgpr0
-    %1:sgpr(s32) = COPY $sgpr1
-    %2:sgpr(p1) = COPY $sgpr2_sgpr3
-    %3:sgpr(s1) = G_IS_FPCLASS %0, 3
-    %4:sgpr(s32) = G_SEXT %3
-    %5:sgpr(s32) = G_ADD %4, %1
-    G_STORE %4, %2 :: (store (s32), addrspace 1)
-    G_STORE %5, %2 :: (store (s32), addrspace 1)
-    S_ENDPGM 0
-...
-
----
-name: test_no_eliminate_branch
-legalized: true
-tracksRegLiveness: true
-
-body: |
-  ; CHECK-LABEL: name: test_no_eliminate_branch
-  ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
-  ; CHECK-NEXT:   liveins: $sgpr0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; CHECK-NEXT:   [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY1]](s32), 3
-  ; CHECK-NEXT:   [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[IS_FPCLASS]](s1)
-  ; CHECK-NEXT:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-  ; CHECK-NEXT:   [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
-  ; CHECK-NEXT:   G_BRCOND [[AND]](s32), %bb.1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   S_ENDPGM 0
-  bb.0:
-    liveins: $sgpr0
-    %0:sgpr(s32) = COPY $sgpr0
-    %1:sgpr(s1) = G_IS_FPCLASS %0, 3
-    G_BRCOND %1, %bb.1
-
-  bb.1:
-    S_ENDPGM 0
-...
-
----
-name: test_no_eliminate_scalar_arithmetic_use
-legalized: true
-tracksRegLiveness: true
-
-body: |
-  bb.0:
-    liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3
-    ; CHECK-LABEL: name: test_no_eliminate_scalar_arithmetic_use
-    ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-    ; CHECK-NEXT: [[IS_FPCLASS:%[0-9]+]]:vcc(s1) = G_IS_FPCLASS [[COPY3]](s32), 3
-    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[IS_FPCLASS]](s1)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
-    ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SELECT]], [[COPY1]]
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
-    ; CHECK-NEXT: G_STORE [[COPY4]](s32), [[COPY2]](p1) :: (store (s32), addrspace 1)
-    ; CHECK-NEXT: S_ENDPGM 0
-    %0:sgpr(s32) = COPY $sgpr0
-    %1:sgpr(s32) = COPY $sgpr1
-    %2:sgpr(p1) = COPY $sgpr2_sgpr3
-    %3:sgpr(s1) = G_IS_FPCLASS %0, 3
-    %4:sgpr(s32) = G_SEXT %3
-    %5:sgpr(s32) = G_ADD %4, %1
-    G_STORE %5, %2 :: (store (s32), addrspace 1)
-    S_ENDPGM 0
-...

>From 4bffbc84d847e5965743267c4fded60b428762f3 Mon Sep 17 00:00:00 2001
From: Vang Thao <vang.thao at amd.com>
Date: Fri, 6 Feb 2026 23:34:34 -0800
Subject: [PATCH 3/3] Add known bits, and adjust combine pattern.

---
 .../Target/AMDGPU/AMDGPURegBankCombiner.cpp   |  60 ++-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |   5 +
 llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll   |   3 -
 llvm/test/CodeGen/AMDGPU/GlobalISel/fcmp.ll   |  71 +--
 .../CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll  |   3 -
 .../CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll  |   3 -
 llvm/test/CodeGen/AMDGPU/GlobalISel/icmp.ll   |  82 ++--
 .../GlobalISel/inst-select-copy-scc-vcc.ll    |  40 +-
 .../regbankcombiner-copy-scc-vcc.mir          | 405 +++++++++---------
 .../GlobalISel/regbankcombiner-smed3.mir      |  76 ++--
 .../GlobalISel/regbankcombiner-umed3.mir      |  76 ++--
 .../AMDGPU/GlobalISel/regbankselect-mui.ll    |   8 +-
 .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll |  75 +---
 .../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll |  82 +---
 llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll   |   3 -
 llvm/test/CodeGen/AMDGPU/fptosi-sat-scalar.ll |  24 +-
 llvm/test/CodeGen/AMDGPU/fptosi-sat-vector.ll |  68 +--
 17 files changed, 432 insertions(+), 652 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index 4f47b5a2a8488..0829867262218 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -76,7 +76,8 @@ class AMDGPURegBankCombinerImpl : public Combiner {
 
   struct CopySccVccMatchInfo {
     Register VccReg;
-    int64_t TrueVal; // -1 for SEXT, 1 for ZEXT
+    Register TrueReg;
+    Register FalseReg;
   };
 
   MinMaxMedOpc getMinMaxPair(unsigned Opc) const;
@@ -145,6 +146,17 @@ Register AMDGPURegBankCombinerImpl::getAsVgpr(Register Reg) const {
   if (isVgprRegBank(Reg))
     return Reg;
 
+  const RegisterBank &VgprRB = RBI.getRegBank(AMDGPU::VGPRRegBankID);
+
+  // Build constants directly in VGPR instead of copying from SGPR.
+  std::optional<ValueAndVReg> Val =
+      getIConstantVRegValWithLookThrough(Reg, MRI);
+  if (Val) {
+    auto VgprCst = B.buildConstant(MRI.getType(Reg), Val->Value);
+    MRI.setRegBank(VgprCst.getReg(0), VgprRB);
+    return VgprCst.getReg(0);
+  }
+
   // Search for existing copy of Reg to vgpr.
   for (MachineInstr &Use : MRI.use_instructions(Reg)) {
     Register Def = Use.getOperand(0).getReg();
@@ -154,7 +166,7 @@ Register AMDGPURegBankCombinerImpl::getAsVgpr(Register Reg) const {
 
   // Copy Reg to vgpr.
   Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0);
-  MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
+  MRI.setRegBank(VgprReg, VgprRB);
   return VgprReg;
 }
 
@@ -486,9 +498,9 @@ bool AMDGPURegBankCombinerImpl::applyD16Load(
   return true;
 }
 
-// Eliminate VCC->SGPR->VGPR register bounce for uniform boolean extensions.
-// Match: COPY (G_SELECT (G_AND (G_AMDGPU_COPY_SCC_VCC %vcc), 1), {-1|1}, 0)
-// Replace with: G_SELECT %vcc, {-1|1}, 0
+// Eliminate VCC->SGPR->VGPR register bounce for uniform boolean in VCC.
+// Match: COPY (G_SELECT (G_AMDGPU_COPY_SCC_VCC %vcc), %true, %false)
+// Replace with: G_SELECT %vcc, %vgpr_true, %vgpr_false
 bool AMDGPURegBankCombinerImpl::matchCopySccVcc(
     MachineInstr &MI, CopySccVccMatchInfo &MatchInfo) const {
   assert(MI.getOpcode() == AMDGPU::COPY);
@@ -502,46 +514,30 @@ bool AMDGPURegBankCombinerImpl::matchCopySccVcc(
   if (!isVgprRegBank(VgprDst))
     return false;
 
-  // Match: G_SELECT (G_AND (G_AMDGPU_COPY_SCC_VCC %vcc), 1), TrueReg, 0
-  MachineInstr *CopySccVcc;
-  Register TrueReg;
+  MachineInstr *CondDef;
+  Register TrueReg, FalseReg;
   if (!mi_match(SgprSrc, MRI,
-                m_GISelect(m_GAnd(m_MInstr(CopySccVcc), m_SpecificICst(1)),
-                           m_Reg(TrueReg), m_ZeroInt())))
+                m_GISelect(m_MInstr(CondDef), m_Reg(TrueReg),
+                           m_Reg(FalseReg))))
     return false;
 
-  if (CopySccVcc->getOpcode() != AMDGPU::G_AMDGPU_COPY_SCC_VCC)
+  if (CondDef->getOpcode() != AMDGPU::G_AMDGPU_COPY_SCC_VCC)
     return false;
 
-  Register VccReg = CopySccVcc->getOperand(1).getReg();
-
-  // TrueReg must be constant -1 (SEXT) or 1 (ZEXT)
-  auto TrueConst = getIConstantVRegValWithLookThrough(TrueReg, MRI);
-  if (!TrueConst)
-    return false;
-
-  int64_t TrueVal = TrueConst->Value.getSExtValue();
-  if (TrueVal != -1 && TrueVal != 1)
-    return false;
-
-  MatchInfo.VccReg = VccReg;
-  MatchInfo.TrueVal = TrueVal;
+  MatchInfo.VccReg = CondDef->getOperand(1).getReg();
+  MatchInfo.TrueReg = TrueReg;
+  MatchInfo.FalseReg = FalseReg;
   return true;
 }
 
 void AMDGPURegBankCombinerImpl::applyCopySccVcc(
     MachineInstr &MI, CopySccVccMatchInfo &MatchInfo) const {
   Register VgprDst = MI.getOperand(0).getReg();
-  LLT Ty = MRI.getType(VgprDst);
-  const RegisterBank &VgprRB = RBI.getRegBank(AMDGPU::VGPRRegBankID);
-
-  auto TrueVal = B.buildConstant(Ty, MatchInfo.TrueVal);
-  MRI.setRegBank(TrueVal.getReg(0), VgprRB);
 
-  auto FalseVal = B.buildConstant(Ty, 0);
-  MRI.setRegBank(FalseVal.getReg(0), VgprRB);
+  Register VgprTrue = getAsVgpr(MatchInfo.TrueReg);
+  Register VgprFalse = getAsVgpr(MatchInfo.FalseReg);
 
-  B.buildSelect(VgprDst, MatchInfo.VccReg, TrueVal, FalseVal);
+  B.buildSelect(VgprDst, MatchInfo.VccReg, VgprTrue, VgprFalse);
   MI.eraseFromParent();
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b043d5354042d..9e86e36ebf3e5 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -18541,6 +18541,11 @@ void SITargetLowering::computeKnownBitsForTargetInstr(
   case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
     Known.Zero.setHighBits(16);
     break;
+  case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
+    // G_AMDGPU_COPY_SCC_VCC converts a uniform boolean in VCC to SGPR s32,
+    // producing exactly 0 or 1.
+    Known.Zero.setHighBits(Known.getBitWidth() - 1);
+    break;
   case AMDGPU::G_AMDGPU_SMED3:
   case AMDGPU::G_AMDGPU_UMED3: {
     auto [Dst, Src0, Src1, Src2] = MI->getFirst4Regs();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
index 6a3077d149a2d..e935dd3dd476e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
@@ -704,7 +704,6 @@ define amdgpu_ps i64 @s_saddo_i64(i64 inreg %a, i64 inreg %b) {
 ; GFX7-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX7-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX7-NEXT:    s_xor_b32 s0, s0, s6
-; GFX7-NEXT:    s_and_b32 s0, s0, 1
 ; GFX7-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX7-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX7-NEXT:    s_add_u32 s0, s4, s0
@@ -724,7 +723,6 @@ define amdgpu_ps i64 @s_saddo_i64(i64 inreg %a, i64 inreg %b) {
 ; GFX8-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX8-NEXT:    s_xor_b32 s0, s0, s6
-; GFX8-NEXT:    s_and_b32 s0, s0, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX8-NEXT:    s_add_u32 s0, s4, s0
@@ -744,7 +742,6 @@ define amdgpu_ps i64 @s_saddo_i64(i64 inreg %a, i64 inreg %b) {
 ; GFX9-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX9-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX9-NEXT:    s_xor_b32 s0, s0, s6
-; GFX9-NEXT:    s_and_b32 s0, s0, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX9-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX9-NEXT:    s_add_u32 s0, s4, s0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fcmp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fcmp.ll
index e00f0238b3bcf..496c6597e9afd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fcmp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fcmp.ll
@@ -48,46 +48,32 @@ define void @fcmp_f16_uniform(half inreg %a, half inreg %b, ptr %p) {
 ; GFX10-NEXT:    s_cselect_b32 s17, 1, 0
 ; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
 ; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
-; GFX10-NEXT:    s_and_b32 s4, s4, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX10-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX10-NEXT:    s_and_b32 s5, s5, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s5, 0
 ; GFX10-NEXT:    s_cselect_b32 s5, 1, 0
-; GFX10-NEXT:    s_and_b32 s6, s6, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX10-NEXT:    s_cselect_b32 s6, 1, 0
-; GFX10-NEXT:    s_and_b32 s7, s7, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX10-NEXT:    s_cselect_b32 s7, 1, 0
-; GFX10-NEXT:    s_and_b32 s8, s8, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s8, 0
 ; GFX10-NEXT:    s_cselect_b32 s8, 1, 0
-; GFX10-NEXT:    s_and_b32 s9, s9, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s9, 0
 ; GFX10-NEXT:    s_cselect_b32 s9, 1, 0
-; GFX10-NEXT:    s_and_b32 s10, s10, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s10, 0
 ; GFX10-NEXT:    s_cselect_b32 s10, 1, 0
-; GFX10-NEXT:    s_and_b32 s11, s11, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s11, 0
 ; GFX10-NEXT:    s_cselect_b32 s11, 1, 0
-; GFX10-NEXT:    s_and_b32 s12, s12, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s12, 0
 ; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
-; GFX10-NEXT:    s_and_b32 s13, s13, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s13, 0
 ; GFX10-NEXT:    s_cselect_b32 s13, 1, 0
-; GFX10-NEXT:    s_and_b32 s14, s14, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s14, 0
 ; GFX10-NEXT:    s_cselect_b32 s14, 1, 0
-; GFX10-NEXT:    s_and_b32 s15, s15, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s15, 0
 ; GFX10-NEXT:    s_cselect_b32 s15, 1, 0
-; GFX10-NEXT:    s_and_b32 s17, s17, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s17, 0
 ; GFX10-NEXT:    s_cselect_b32 s17, 1, 0
-; GFX10-NEXT:    s_and_b32 s16, s16, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
 ; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
 ; GFX10-NEXT:    s_add_i32 s4, s4, s5
@@ -447,46 +433,32 @@ define void @fcmp_f32_uniform(float inreg %a, float inreg %b, ptr %p) {
 ; GFX10-NEXT:    s_cselect_b32 s17, 1, 0
 ; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
 ; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
-; GFX10-NEXT:    s_and_b32 s4, s4, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX10-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX10-NEXT:    s_and_b32 s5, s5, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s5, 0
 ; GFX10-NEXT:    s_cselect_b32 s5, 1, 0
-; GFX10-NEXT:    s_and_b32 s6, s6, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX10-NEXT:    s_cselect_b32 s6, 1, 0
-; GFX10-NEXT:    s_and_b32 s7, s7, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX10-NEXT:    s_cselect_b32 s7, 1, 0
-; GFX10-NEXT:    s_and_b32 s8, s8, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s8, 0
 ; GFX10-NEXT:    s_cselect_b32 s8, 1, 0
-; GFX10-NEXT:    s_and_b32 s9, s9, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s9, 0
 ; GFX10-NEXT:    s_cselect_b32 s9, 1, 0
-; GFX10-NEXT:    s_and_b32 s10, s10, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s10, 0
 ; GFX10-NEXT:    s_cselect_b32 s10, 1, 0
-; GFX10-NEXT:    s_and_b32 s11, s11, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s11, 0
 ; GFX10-NEXT:    s_cselect_b32 s11, 1, 0
-; GFX10-NEXT:    s_and_b32 s12, s12, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s12, 0
 ; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
-; GFX10-NEXT:    s_and_b32 s13, s13, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s13, 0
 ; GFX10-NEXT:    s_cselect_b32 s13, 1, 0
-; GFX10-NEXT:    s_and_b32 s14, s14, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s14, 0
 ; GFX10-NEXT:    s_cselect_b32 s14, 1, 0
-; GFX10-NEXT:    s_and_b32 s15, s15, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s15, 0
 ; GFX10-NEXT:    s_cselect_b32 s15, 1, 0
-; GFX10-NEXT:    s_and_b32 s17, s17, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s17, 0
 ; GFX10-NEXT:    s_cselect_b32 s17, 1, 0
-; GFX10-NEXT:    s_and_b32 s16, s16, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
 ; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
 ; GFX10-NEXT:    s_add_i32 s4, s4, s5
@@ -846,46 +818,32 @@ define void @fcmp_f64_uniform(double inreg %a, double inreg %b, ptr %p) {
 ; GFX10-NEXT:    s_cselect_b32 s17, 1, 0
 ; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
 ; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
-; GFX10-NEXT:    s_and_b32 s4, s4, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX10-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX10-NEXT:    s_and_b32 s5, s5, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s5, 0
 ; GFX10-NEXT:    s_cselect_b32 s5, 1, 0
-; GFX10-NEXT:    s_and_b32 s6, s6, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX10-NEXT:    s_cselect_b32 s6, 1, 0
-; GFX10-NEXT:    s_and_b32 s7, s7, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX10-NEXT:    s_cselect_b32 s7, 1, 0
-; GFX10-NEXT:    s_and_b32 s8, s8, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s8, 0
 ; GFX10-NEXT:    s_cselect_b32 s8, 1, 0
-; GFX10-NEXT:    s_and_b32 s9, s9, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s9, 0
 ; GFX10-NEXT:    s_cselect_b32 s9, 1, 0
-; GFX10-NEXT:    s_and_b32 s10, s10, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s10, 0
 ; GFX10-NEXT:    s_cselect_b32 s10, 1, 0
-; GFX10-NEXT:    s_and_b32 s11, s11, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s11, 0
 ; GFX10-NEXT:    s_cselect_b32 s11, 1, 0
-; GFX10-NEXT:    s_and_b32 s12, s12, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s12, 0
 ; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
-; GFX10-NEXT:    s_and_b32 s13, s13, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s13, 0
 ; GFX10-NEXT:    s_cselect_b32 s13, 1, 0
-; GFX10-NEXT:    s_and_b32 s14, s14, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s14, 0
 ; GFX10-NEXT:    s_cselect_b32 s14, 1, 0
-; GFX10-NEXT:    s_and_b32 s15, s15, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s15, 0
 ; GFX10-NEXT:    s_cselect_b32 s15, 1, 0
-; GFX10-NEXT:    s_and_b32 s17, s17, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s17, 0
 ; GFX10-NEXT:    s_cselect_b32 s17, 1, 0
-; GFX10-NEXT:    s_and_b32 s16, s16, 1
 ; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
 ; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
 ; GFX10-NEXT:    s_add_i32 s4, s4, s5
@@ -956,62 +914,35 @@ define void @fcmp_f64_uniform(double inreg %a, double inreg %b, ptr %p) {
 ; GFX12-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX12-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
-; GFX12-NEXT:    s_and_b32 s4, s4, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX12-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX12-NEXT:    s_and_b32 s1, s1, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX12-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX12-NEXT:    s_and_b32 s2, s2, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX12-NEXT:    s_cselect_b32 s2, 1, 0
-; GFX12-NEXT:    s_and_b32 s3, s3, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX12-NEXT:    s_cselect_b32 s3, 1, 0
-; GFX12-NEXT:    s_and_b32 s5, s5, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s5, 0
 ; GFX12-NEXT:    s_cselect_b32 s5, 1, 0
-; GFX12-NEXT:    s_and_b32 s6, s6, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX12-NEXT:    s_cselect_b32 s6, 1, 0
-; GFX12-NEXT:    s_and_b32 s7, s7, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX12-NEXT:    s_cselect_b32 s7, 1, 0
-; GFX12-NEXT:    s_and_b32 s8, s8, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s8, 0
 ; GFX12-NEXT:    s_cselect_b32 s8, 1, 0
-; GFX12-NEXT:    s_and_b32 s9, s9, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s9, 0
 ; GFX12-NEXT:    s_cselect_b32 s9, 1, 0
-; GFX12-NEXT:    s_and_b32 s10, s10, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s10, 0
 ; GFX12-NEXT:    s_cselect_b32 s10, 1, 0
-; GFX12-NEXT:    s_and_b32 s11, s11, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s11, 0
 ; GFX12-NEXT:    s_cselect_b32 s11, 1, 0
-; GFX12-NEXT:    s_and_b32 s12, s12, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s12, 0
 ; GFX12-NEXT:    s_cselect_b32 s12, 1, 0
-; GFX12-NEXT:    s_and_b32 s13, s13, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s13, 0
 ; GFX12-NEXT:    s_cselect_b32 s13, 1, 0
-; GFX12-NEXT:    s_and_b32 s0, s0, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX12-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_add_co_i32 s1, s4, s1
 ; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_add_co_i32 s1, s1, s2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll
index c2b71f563990f..51d8b0523e644 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll
@@ -287,9 +287,6 @@ define amdgpu_ps float @s_test_fmax_legacy_f32(float inreg %a, float inreg %b) {
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX8-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v0
 ; GFX8-NEXT:    s_cmp_lg_u64 vcc, 0
-; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX8-NEXT:    s_and_b32 s0, s0, 1
-; GFX8-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX8-NEXT:    s_cselect_b32 s0, s2, s3
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    ; return to shader part epilog
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll
index a3b87dd7d66ae..ef9646e6b845f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll
@@ -416,9 +416,6 @@ define amdgpu_ps float @s_test_fmin_legacy_f32(float inreg %a, float inreg %b) {
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX8-NEXT:    v_cmp_le_f32_e32 vcc, s2, v0
 ; GFX8-NEXT:    s_cmp_lg_u64 vcc, 0
-; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX8-NEXT:    s_and_b32 s0, s0, 1
-; GFX8-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX8-NEXT:    s_cselect_b32 s0, s2, s3
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    ; return to shader part epilog
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/icmp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/icmp.ll
index d90cead68524e..1b4417890004e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/icmp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/icmp.ll
@@ -1094,39 +1094,34 @@ define void @icmp_p0_uniform(ptr inreg %a, ptr inreg %b, ptr addrspace(1) %p) {
 ; HAWAII-NEXT:    v_mov_b32_e32 v2, s18
 ; HAWAII-NEXT:    v_mov_b32_e32 v3, s19
 ; HAWAII-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[2:3]
+; HAWAII-NEXT:    s_mov_b32 s6, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_ne_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s6, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s7, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_lt_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s7, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s8, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_gt_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s8, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s9, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_le_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s9, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s10, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_ge_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s10, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s11, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    s_cselect_b32 s4, 1, 0
-; HAWAII-NEXT:    s_and_b32 s5, s6, 1
-; HAWAII-NEXT:    s_cmp_lg_u32 s5, 0
-; HAWAII-NEXT:    s_cselect_b32 s5, 1, 0
-; HAWAII-NEXT:    s_and_b32 s7, s7, 1
 ; HAWAII-NEXT:    s_cmp_lg_u32 s7, 0
-; HAWAII-NEXT:    s_cselect_b32 s7, 1, 0
-; HAWAII-NEXT:    s_and_b32 s8, s8, 1
+; HAWAII-NEXT:    s_cselect_b32 s5, 1, 0
 ; HAWAII-NEXT:    s_cmp_lg_u32 s8, 0
-; HAWAII-NEXT:    s_cselect_b32 s8, 1, 0
-; HAWAII-NEXT:    s_and_b32 s9, s9, 1
+; HAWAII-NEXT:    s_cselect_b32 s7, 1, 0
 ; HAWAII-NEXT:    s_cmp_lg_u32 s9, 0
-; HAWAII-NEXT:    s_cselect_b32 s9, 1, 0
-; HAWAII-NEXT:    s_and_b32 s10, s10, 1
+; HAWAII-NEXT:    s_cselect_b32 s8, 1, 0
 ; HAWAII-NEXT:    s_cmp_lg_u32 s10, 0
+; HAWAII-NEXT:    s_cselect_b32 s9, 1, 0
+; HAWAII-NEXT:    s_cmp_lg_u32 s11, 0
 ; HAWAII-NEXT:    s_cselect_b32 s10, 1, 0
-; HAWAII-NEXT:    s_and_b32 s4, s4, 1
 ; HAWAII-NEXT:    s_cmp_lg_u32 s4, 0
 ; HAWAII-NEXT:    s_cselect_b32 s4, 1, 0
 ; HAWAII-NEXT:    s_add_i32 s5, s5, s7
@@ -1134,7 +1129,6 @@ define void @icmp_p0_uniform(ptr inreg %a, ptr inreg %b, ptr addrspace(1) %p) {
 ; HAWAII-NEXT:    s_add_i32 s5, s5, s9
 ; HAWAII-NEXT:    s_add_i32 s5, s5, s10
 ; HAWAII-NEXT:    s_add_i32 s4, s5, s4
-; HAWAII-NEXT:    s_mov_b32 s6, 0
 ; HAWAII-NEXT:    v_mov_b32_e32 v2, s4
 ; HAWAII-NEXT:    s_mov_b32 s7, 0xf000
 ; HAWAII-NEXT:    s_mov_b64 s[4:5], 0
@@ -1170,22 +1164,15 @@ define void @icmp_p0_uniform(ptr inreg %a, ptr inreg %b, ptr addrspace(1) %p) {
 ; GFX12-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX12-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX12-NEXT:    s_cselect_b32 s3, 1, 0
-; GFX12-NEXT:    s_and_b32 s4, s5, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
-; GFX12-NEXT:    s_cmp_lg_u32 s4, 0
-; GFX12-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX12-NEXT:    s_and_b32 s5, s7, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s5, 0
+; GFX12-NEXT:    s_cselect_b32 s4, 1, 0
+; GFX12-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX12-NEXT:    s_cselect_b32 s5, 1, 0
-; GFX12-NEXT:    s_and_b32 s1, s1, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX12-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX12-NEXT:    s_and_b32 s0, s0, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX12-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_add_co_i32 s2, s2, s3
 ; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_add_co_i32 s2, s2, s4
@@ -1308,39 +1295,34 @@ define void @icmp_p1_uniform(ptr addrspace(1) inreg %a, ptr addrspace(1) inreg %
 ; HAWAII-NEXT:    v_mov_b32_e32 v2, s18
 ; HAWAII-NEXT:    v_mov_b32_e32 v3, s19
 ; HAWAII-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[2:3]
+; HAWAII-NEXT:    s_mov_b32 s6, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_ne_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s6, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s7, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_lt_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s7, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s8, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_gt_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s8, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s9, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_le_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s9, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s10, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    v_cmp_ge_u64_e32 vcc, s[16:17], v[2:3]
-; HAWAII-NEXT:    s_cselect_b32 s10, 1, 0
+; HAWAII-NEXT:    s_cselect_b32 s11, 1, 0
 ; HAWAII-NEXT:    s_or_b64 s[4:5], vcc, vcc
 ; HAWAII-NEXT:    s_cselect_b32 s4, 1, 0
-; HAWAII-NEXT:    s_and_b32 s5, s6, 1
-; HAWAII-NEXT:    s_cmp_lg_u32 s5, 0
-; HAWAII-NEXT:    s_cselect_b32 s5, 1, 0
-; HAWAII-NEXT:    s_and_b32 s7, s7, 1
 ; HAWAII-NEXT:    s_cmp_lg_u32 s7, 0
-; HAWAII-NEXT:    s_cselect_b32 s7, 1, 0
-; HAWAII-NEXT:    s_and_b32 s8, s8, 1
+; HAWAII-NEXT:    s_cselect_b32 s5, 1, 0
 ; HAWAII-NEXT:    s_cmp_lg_u32 s8, 0
-; HAWAII-NEXT:    s_cselect_b32 s8, 1, 0
-; HAWAII-NEXT:    s_and_b32 s9, s9, 1
+; HAWAII-NEXT:    s_cselect_b32 s7, 1, 0
 ; HAWAII-NEXT:    s_cmp_lg_u32 s9, 0
-; HAWAII-NEXT:    s_cselect_b32 s9, 1, 0
-; HAWAII-NEXT:    s_and_b32 s10, s10, 1
+; HAWAII-NEXT:    s_cselect_b32 s8, 1, 0
 ; HAWAII-NEXT:    s_cmp_lg_u32 s10, 0
+; HAWAII-NEXT:    s_cselect_b32 s9, 1, 0
+; HAWAII-NEXT:    s_cmp_lg_u32 s11, 0
 ; HAWAII-NEXT:    s_cselect_b32 s10, 1, 0
-; HAWAII-NEXT:    s_and_b32 s4, s4, 1
 ; HAWAII-NEXT:    s_cmp_lg_u32 s4, 0
 ; HAWAII-NEXT:    s_cselect_b32 s4, 1, 0
 ; HAWAII-NEXT:    s_add_i32 s5, s5, s7
@@ -1348,7 +1330,6 @@ define void @icmp_p1_uniform(ptr addrspace(1) inreg %a, ptr addrspace(1) inreg %
 ; HAWAII-NEXT:    s_add_i32 s5, s5, s9
 ; HAWAII-NEXT:    s_add_i32 s5, s5, s10
 ; HAWAII-NEXT:    s_add_i32 s4, s5, s4
-; HAWAII-NEXT:    s_mov_b32 s6, 0
 ; HAWAII-NEXT:    v_mov_b32_e32 v2, s4
 ; HAWAII-NEXT:    s_mov_b32 s7, 0xf000
 ; HAWAII-NEXT:    s_mov_b64 s[4:5], 0
@@ -1384,22 +1365,15 @@ define void @icmp_p1_uniform(ptr addrspace(1) inreg %a, ptr addrspace(1) inreg %
 ; GFX12-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX12-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX12-NEXT:    s_cselect_b32 s3, 1, 0
-; GFX12-NEXT:    s_and_b32 s4, s5, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
-; GFX12-NEXT:    s_cmp_lg_u32 s4, 0
-; GFX12-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX12-NEXT:    s_and_b32 s5, s7, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s5, 0
+; GFX12-NEXT:    s_cselect_b32 s4, 1, 0
+; GFX12-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX12-NEXT:    s_cselect_b32 s5, 1, 0
-; GFX12-NEXT:    s_and_b32 s1, s1, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX12-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX12-NEXT:    s_and_b32 s0, s0, 1
-; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX12-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_add_co_i32 s2, s2, s3
 ; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
 ; GFX12-NEXT:    s_add_co_i32 s2, s2, s4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll
index 588802cbd56c7..2b1c9c68372db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll
@@ -6,18 +6,15 @@
 define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr addrspace(1) %out) {
 ; GFX7-LABEL: fcmp_uniform_select:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x9
-; GFX7-NEXT:    s_load_dword s3, s[4:5], 0xb
+; GFX7-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x9
+; GFX7-NEXT:    s_load_dword s6, s[4:5], 0xb
 ; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xd
-; GFX7-NEXT:    s_mov_b32 s2, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    v_cmp_eq_f32_e64 s[4:5], s6, 0
-; GFX7-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5]
-; GFX7-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX7-NEXT:    s_and_b32 s4, s4, 1
-; GFX7-NEXT:    s_cmp_lg_u32 s4, 0
-; GFX7-NEXT:    s_cselect_b32 s3, s7, s3
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s3
+; GFX7-NEXT:    v_mov_b32_e32 v1, s6
+; GFX7-NEXT:    v_cmp_eq_f32_e64 vcc, s2, 0
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX7-NEXT:    s_mov_b32 s2, -1
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_endpgm
@@ -28,14 +25,11 @@ define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr add
 ; GFX8-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x34
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_eq_f32_e64 s[4:5], s0, 0
-; GFX8-NEXT:    s_cmp_lg_u64 s[4:5], 0
-; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX8-NEXT:    s_and_b32 s0, s0, 1
-; GFX8-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX8-NEXT:    s_cselect_b32 s0, s1, s6
+; GFX8-NEXT:    v_mov_b32_e32 v0, s1
+; GFX8-NEXT:    v_mov_b32_e32 v1, s6
+; GFX8-NEXT:    v_cmp_eq_f32_e64 vcc, s0, 0
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
-; GFX8-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
@@ -43,20 +37,14 @@ define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr add
 ; GFX11-LABEL: fcmp_uniform_select:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_clause 0x2
-; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11-NEXT:    s_load_b32 s6, s[4:5], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11-NEXT:    s_load_b64 s[2:3], s[4:5], 0x34
-; GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s6
 ; GFX11-NEXT:    v_cmp_eq_f32_e64 s0, s0, 0
-; GFX11-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX11-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT:    s_and_b32 s0, s0, 1
-; GFX11-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX11-NEXT:    s_cselect_b32 s0, s1, s6
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, s1, s0
 ; GFX11-NEXT:    global_store_b32 v1, v0, s[2:3]
 ; GFX11-NEXT:    s_endpgm
   %cmp = fcmp oeq float %a, 0.0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-copy-scc-vcc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-copy-scc-vcc.mir
index 1655a9e9e80b5..0605a7710397d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-copy-scc-vcc.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-copy-scc-vcc.mir
@@ -171,46 +171,6 @@ body: |
     S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
 ...
 
----
-name: test_multiple_vgpr_copies
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
-  bb.1:
-    liveins: $vgpr0, $vgpr1
-
-    ; CHECK-LABEL: name: test_multiple_vgpr_copies
-    ; CHECK: liveins: $vgpr0, $vgpr1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
-    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
-    ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]]
-    ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](s32)
-    ; CHECK-NEXT: $vgpr1 = COPY [[SELECT1]](s32)
-    ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
-    %0:vgpr(s32) = COPY $vgpr0
-    %1:vgpr(s32) = COPY $vgpr1
-    %2:vcc(s1) = G_ICMP intpred(eq), %0, %1
-    %3:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %2
-    %4:sgpr(s32) = G_CONSTANT i32 1
-    %5:sgpr(s32) = G_AND %3, %4
-    %6:sgpr(s32) = G_CONSTANT i32 -1
-    %7:sgpr(s32) = G_CONSTANT i32 0
-    %8:sgpr(s32) = G_SELECT %5, %6, %7
-    %9:vgpr(s32) = COPY %8
-    %10:vgpr(s32) = COPY %8
-    $vgpr0 = COPY %9(s32)
-    $vgpr1 = COPY %10(s32)
-    S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
-...
-
 ---
 name: test_is_fpclass_source
 legalized: true
@@ -244,7 +204,7 @@ body: |
 ...
 
 ---
-name: test_and_commuted
+name: test_arbitrary_true_constant
 legalized: true
 regBankSelected: true
 tracksRegLiveness: true
@@ -252,13 +212,13 @@ body: |
   bb.1:
     liveins: $vgpr0, $vgpr1
 
-    ; CHECK-LABEL: name: test_and_commuted
+    ; CHECK-LABEL: name: test_arbitrary_true_constant
     ; CHECK: liveins: $vgpr0, $vgpr1
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 2
     ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
     ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](s32)
@@ -268,8 +228,8 @@ body: |
     %2:vcc(s1) = G_ICMP intpred(eq), %0, %1
     %3:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %2
     %4:sgpr(s32) = G_CONSTANT i32 1
-    %5:sgpr(s32) = G_AND %4, %3
-    %6:sgpr(s32) = G_CONSTANT i32 1
+    %5:sgpr(s32) = G_AND %3, %4
+    %6:sgpr(s32) = G_CONSTANT i32 2
     %7:sgpr(s32) = G_CONSTANT i32 0
     %8:sgpr(s32) = G_SELECT %5, %6, %7
     %9:vgpr(s32) = COPY %8
@@ -278,90 +238,75 @@ body: |
 ...
 
 ---
-name: test_mixed_uses
+name: test_arbitrary_false_constant
 legalized: true
 regBankSelected: true
 tracksRegLiveness: true
 body: |
   bb.1:
-    liveins: $vgpr0, $vgpr1, $sgpr0
+    liveins: $vgpr0, $vgpr1
 
-    ; CHECK-LABEL: name: test_mixed_uses
-    ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr0
+    ; CHECK-LABEL: name: test_arbitrary_false_constant
+    ; CHECK: liveins: $vgpr0, $vgpr1
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[ICMP]](s1)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
-    ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
-    ; CHECK-NEXT: [[C4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[C4]]
-    ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SELECT]], [[COPY2]]
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
-    ; CHECK-NEXT: $vgpr0 = COPY [[SELECT1]](s32)
-    ; CHECK-NEXT: $vgpr1 = COPY [[COPY3]](s32)
-    ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 5
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](s32)
+    ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
-    %2:sgpr(s32) = COPY $sgpr0
-    %3:vcc(s1) = G_ICMP intpred(eq), %0, %1
-    %4:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %3
-    %5:sgpr(s32) = G_CONSTANT i32 1
-    %6:sgpr(s32) = G_AND %4, %5
-    %7:sgpr(s32) = G_CONSTANT i32 -1
-    %8:sgpr(s32) = G_CONSTANT i32 0
-    %9:sgpr(s32) = G_SELECT %6, %7, %8
-    %10:vgpr(s32) = COPY %9
-    %11:sgpr(s32) = G_ADD %9, %2
-    %12:vgpr(s32) = COPY %11
-    $vgpr0 = COPY %10(s32)
-    $vgpr1 = COPY %12(s32)
-    S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+    %2:vcc(s1) = G_ICMP intpred(eq), %0, %1
+    %3:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %2
+    %4:sgpr(s32) = G_CONSTANT i32 1
+    %5:sgpr(s32) = G_AND %3, %4
+    %6:sgpr(s32) = G_CONSTANT i32 -1
+    %7:sgpr(s32) = G_CONSTANT i32 5
+    %8:sgpr(s32) = G_SELECT %5, %6, %7
+    %9:vgpr(s32) = COPY %8
+    $vgpr0 = COPY %9(s32)
+    S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
 ...
 
 ---
-name: test_no_eliminate_branch
+name: test_non_constant_operands
 legalized: true
 regBankSelected: true
 tracksRegLiveness: true
 body: |
-  ; CHECK-LABEL: name: test_no_eliminate_branch
-  ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
-  ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-  ; CHECK-NEXT:   [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[ICMP]](s1)
-  ; CHECK-NEXT:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-  ; CHECK-NEXT:   [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
-  ; CHECK-NEXT:   G_BRCOND [[AND]](s32), %bb.1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   S_ENDPGM 0
-  bb.0:
-    liveins: $vgpr0, $vgpr1
+  bb.1:
+    liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1
+
+    ; CHECK-LABEL: name: test_non_constant_operands
+    ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY5]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](s32)
+    ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
-    %2:vcc(s1) = G_ICMP intpred(eq), %0, %1
-    %3:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %2
-    %4:sgpr(s32) = G_CONSTANT i32 1
-    %5:sgpr(s32) = G_AND %3, %4
-    G_BRCOND %5(s32), %bb.1
-
-  bb.1:
-    S_ENDPGM 0
+    %2:sgpr(s32) = COPY $sgpr0
+    %3:sgpr(s32) = COPY $sgpr1
+    %4:vcc(s1) = G_ICMP intpred(eq), %0, %1
+    %5:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %4
+    %6:sgpr(s32) = G_SELECT %5, %2, %3
+    %7:vgpr(s32) = COPY %6
+    $vgpr0 = COPY %7(s32)
+    S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
 ...
 
 ---
-name: test_no_eliminate_scalar_arithmetic_use
+name: test_shared_constant_no_copy
 legalized: true
 regBankSelected: true
 tracksRegLiveness: true
@@ -369,23 +314,22 @@ body: |
   bb.1:
     liveins: $vgpr0, $vgpr1, $sgpr0
 
-    ; CHECK-LABEL: name: test_no_eliminate_scalar_arithmetic_use
+    ; CHECK-LABEL: name: test_shared_constant_no_copy
     ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[ICMP]](s1)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
-    ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SELECT]], [[COPY2]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]]
+    ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY2]], [[C]]
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
-    ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32)
-    ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+    ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[COPY3]](s32)
+    ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:sgpr(s32) = COPY $sgpr0
@@ -396,50 +340,62 @@ body: |
     %7:sgpr(s32) = G_CONSTANT i32 -1
     %8:sgpr(s32) = G_CONSTANT i32 0
     %9:sgpr(s32) = G_SELECT %6, %7, %8
-    %10:sgpr(s32) = G_ADD %9, %2
-    %11:vgpr(s32) = COPY %10
-    $vgpr0 = COPY %11(s32)
-    S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+    %10:vgpr(s32) = COPY %9
+    %11:sgpr(s32) = G_ADD %2, %7
+    %12:vgpr(s32) = COPY %11
+    $vgpr0 = COPY %10(s32)
+    $vgpr1 = COPY %12(s32)
+    S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
 ...
 
 ---
-name: test_no_match_physical_reg_dst
+name: test_mixed_uses
 legalized: true
 regBankSelected: true
 tracksRegLiveness: true
 body: |
   bb.1:
-    liveins: $vgpr0, $vgpr1
+    liveins: $vgpr0, $vgpr1, $sgpr0
 
-    ; CHECK-LABEL: name: test_no_match_physical_reg_dst
-    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: test_mixed_uses
+    ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
     ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[ICMP]](s1)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
-    ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](s32)
-    ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AMDGPU_COPY_SCC_VCC]](s32), [[C]], [[C1]]
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]]
+    ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SELECT]], [[COPY2]]
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[SELECT1]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[COPY3]](s32)
+    ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
-    %2:vcc(s1) = G_ICMP intpred(eq), %0, %1
-    %3:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %2
-    %4:sgpr(s32) = G_CONSTANT i32 1
-    %5:sgpr(s32) = G_AND %3, %4
-    %6:sgpr(s32) = G_CONSTANT i32 -1
-    %7:sgpr(s32) = G_CONSTANT i32 0
-    %8:sgpr(s32) = G_SELECT %5, %6, %7
-    $vgpr0 = COPY %8(s32)
-    S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+    %2:sgpr(s32) = COPY $sgpr0
+    %3:vcc(s1) = G_ICMP intpred(eq), %0, %1
+    %4:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %3
+    %5:sgpr(s32) = G_CONSTANT i32 1
+    %6:sgpr(s32) = G_AND %4, %5
+    %7:sgpr(s32) = G_CONSTANT i32 -1
+    %8:sgpr(s32) = G_CONSTANT i32 0
+    %9:sgpr(s32) = G_SELECT %6, %7, %8
+    %10:vgpr(s32) = COPY %9
+    %11:sgpr(s32) = G_ADD %9, %2
+    %12:vgpr(s32) = COPY %11
+    $vgpr0 = COPY %10(s32)
+    $vgpr1 = COPY %12(s32)
+    S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
 ...
 
 ---
-name: test_no_match_no_and
+name: test_no_and
 legalized: true
 regBankSelected: true
 tracksRegLiveness: true
@@ -447,18 +403,16 @@ body: |
   bb.1:
     liveins: $vgpr0, $vgpr1
 
-    ; CHECK-LABEL: name: test_no_match_no_and
+    ; CHECK-LABEL: name: test_no_and
     ; CHECK: liveins: $vgpr0, $vgpr1
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[ICMP]](s1)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AMDGPU_COPY_SCC_VCC]](s32), [[C]], [[C1]]
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
-    ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -473,7 +427,7 @@ body: |
 ...
 
 ---
-name: test_no_match_and_mask_not_1
+name: test_and_known_bits
 legalized: true
 regBankSelected: true
 tracksRegLiveness: true
@@ -481,37 +435,33 @@ body: |
   bb.1:
     liveins: $vgpr0, $vgpr1
 
-    ; CHECK-LABEL: name: test_no_match_and_mask_not_1
+    ; CHECK-LABEL: name: test_and_known_bits
     ; CHECK: liveins: $vgpr0, $vgpr1
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
     ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[ICMP]](s1)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
-    ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
-    ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AMDGPU_COPY_SCC_VCC]](s32), [[C]], [[C1]]
+    ; CHECK-NEXT: $sgpr0 = COPY [[SELECT]](s32)
+    ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $sgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vcc(s1) = G_ICMP intpred(eq), %0, %1
     %3:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %2
-    %4:sgpr(s32) = G_CONSTANT i32 3
-    %5:sgpr(s32) = G_AND %3, %4
-    %6:sgpr(s32) = G_CONSTANT i32 -1
+    %4:sgpr(s32) = G_CONSTANT i32 1
+    %5:sgpr(s32) = G_AND %4, %3
+    %6:sgpr(s32) = G_CONSTANT i32 1
     %7:sgpr(s32) = G_CONSTANT i32 0
     %8:sgpr(s32) = G_SELECT %5, %6, %7
-    %9:vgpr(s32) = COPY %8
-    $vgpr0 = COPY %9(s32)
-    S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+    $sgpr0 = COPY %8(s32)
+    S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $sgpr0
 ...
 
 ---
-name: test_no_match_trueval_not_1_or_neg1
+name: test_multiple_selects
 legalized: true
 regBankSelected: true
 tracksRegLiveness: true
@@ -519,33 +469,35 @@ body: |
   bb.1:
     liveins: $vgpr0, $vgpr1
 
-    ; CHECK-LABEL: name: test_no_match_trueval_not_1_or_neg1
+    ; CHECK-LABEL: name: test_multiple_selects
     ; CHECK: liveins: $vgpr0, $vgpr1
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[ICMP]](s1)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
-    ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
-    ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[SELECT1]](s32)
+    ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vcc(s1) = G_ICMP intpred(eq), %0, %1
     %3:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %2
     %4:sgpr(s32) = G_CONSTANT i32 1
     %5:sgpr(s32) = G_AND %3, %4
-    %6:sgpr(s32) = G_CONSTANT i32 2
+    %6:sgpr(s32) = G_CONSTANT i32 -1
     %7:sgpr(s32) = G_CONSTANT i32 0
     %8:sgpr(s32) = G_SELECT %5, %6, %7
     %9:vgpr(s32) = COPY %8
+    %10:vgpr(s32) = COPY %8
     $vgpr0 = COPY %9(s32)
-    S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+    $vgpr1 = COPY %10(s32)
+    S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
 ...
 
 ---
@@ -564,11 +516,9 @@ body: |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
     ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[ICMP]](s1)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AMDGPU_COPY_SCC_VCC]](s32), [[C]], [[C1]]
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY [[SELECT]](s32)
     ; CHECK-NEXT: $sgpr0 = COPY [[COPY2]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $sgpr0
@@ -587,7 +537,80 @@ body: |
 ...
 
 ---
-name: test_no_match_false_not_zero
+name: test_no_match_brcond
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: test_no_match_brcond
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
+  ; CHECK-NEXT:   [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[ICMP]](s1)
+  ; CHECK-NEXT:   G_BRCOND [[AMDGPU_COPY_SCC_VCC]](s32), %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vcc(s1) = G_ICMP intpred(eq), %0, %1
+    %3:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %2
+    %4:sgpr(s32) = G_CONSTANT i32 1
+    %5:sgpr(s32) = G_AND %3, %4
+    G_BRCOND %5(s32), %bb.1
+
+  bb.1:
+    S_ENDPGM 0
+...
+
+---
+name: test_no_match_sgpr_use
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+  bb.1:
+    liveins: $vgpr0, $vgpr1, $sgpr0
+
+    ; CHECK-LABEL: name: test_no_match_sgpr_use
+    ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
+    ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[ICMP]](s1)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AMDGPU_COPY_SCC_VCC]](s32), [[C]], [[C1]]
+    ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SELECT]], [[COPY2]]
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32)
+    ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:sgpr(s32) = COPY $sgpr0
+    %3:vcc(s1) = G_ICMP intpred(eq), %0, %1
+    %4:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %3
+    %5:sgpr(s32) = G_CONSTANT i32 1
+    %6:sgpr(s32) = G_AND %4, %5
+    %7:sgpr(s32) = G_CONSTANT i32 -1
+    %8:sgpr(s32) = G_CONSTANT i32 0
+    %9:sgpr(s32) = G_SELECT %6, %7, %8
+    %10:sgpr(s32) = G_ADD %9, %2
+    %11:vgpr(s32) = COPY %10
+    $vgpr0 = COPY %11(s32)
+    S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+...
+
+---
+name: test_no_match_physical_reg_dst
 legalized: true
 regBankSelected: true
 tracksRegLiveness: true
@@ -595,20 +618,17 @@ body: |
   bb.1:
     liveins: $vgpr0, $vgpr1
 
-    ; CHECK-LABEL: name: test_no_match_false_not_zero
+    ; CHECK-LABEL: name: test_no_match_physical_reg_dst
     ; CHECK: liveins: $vgpr0, $vgpr1
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
     ; CHECK-NEXT: [[AMDGPU_COPY_SCC_VCC:%[0-9]+]]:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC [[ICMP]](s1)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[AMDGPU_COPY_SCC_VCC]], [[C]]
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
-    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
-    ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AMDGPU_COPY_SCC_VCC]](s32), [[C]], [[C1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[SELECT]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -617,9 +637,8 @@ body: |
     %4:sgpr(s32) = G_CONSTANT i32 1
     %5:sgpr(s32) = G_AND %3, %4
     %6:sgpr(s32) = G_CONSTANT i32 -1
-    %7:sgpr(s32) = G_CONSTANT i32 5
+    %7:sgpr(s32) = G_CONSTANT i32 0
     %8:sgpr(s32) = G_SELECT %5, %6, %7
-    %9:vgpr(s32) = COPY %8
-    $vgpr0 = COPY %9(s32)
+    $vgpr0 = COPY %8(s32)
     S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir
index f18a576b56250..197d586e1502e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir
@@ -15,11 +15,9 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY1]], [[COPY2]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
@@ -46,11 +44,9 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY1]], [[COPY2]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
@@ -77,11 +73,9 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY1]], [[COPY2]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
@@ -108,11 +102,9 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY1]], [[COPY2]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
@@ -139,11 +131,9 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY1]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
@@ -170,11 +160,9 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY1]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
@@ -201,11 +189,9 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY1]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
@@ -232,11 +218,9 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY1]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
@@ -304,8 +288,8 @@ body: |
     ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
     ; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SMAX]], [[C1]]
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[SMIN]](s32)
-    ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
-    ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
+    ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
+    ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
     ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
     %0:sgpr(s32) = COPY $sgpr2
     %3:sgpr(s32) = G_CONSTANT i32 -12
@@ -331,11 +315,9 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY1]], [[COPY2]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 65
+    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir
index ef05a5274462a..ec0b78101f577 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir
@@ -15,11 +15,9 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY1]], [[COPY2]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
@@ -46,11 +44,9 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY1]], [[COPY2]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
@@ -77,11 +73,9 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY1]], [[COPY2]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
@@ -108,11 +102,9 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY1]], [[COPY2]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
@@ -139,11 +131,9 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY1]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
@@ -170,11 +160,9 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY1]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
@@ -201,11 +189,9 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY1]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
@@ -232,11 +218,9 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY1]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
@@ -304,8 +288,8 @@ body: |
     ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
     ; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[UMAX]], [[C1]]
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UMIN]](s32)
-    ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
-    ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
+    ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
+    ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
     ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
     %0:sgpr(s32) = COPY $sgpr2
     %3:sgpr(s32) = G_CONSTANT i32 12
@@ -332,11 +316,9 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY1]], [[COPY2]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 65
+    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
     ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
     ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
index e450da73ab47d..3ddc94ac9308c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
@@ -209,13 +209,9 @@ define amdgpu_ps void @vcc_to_scc(float inreg %a, i32 inreg %b, i32 inreg %c, pt
 ;
 ; NEW_RBS-LABEL: vcc_to_scc:
 ; NEW_RBS:       ; %bb.0:
+; NEW_RBS-NEXT:    v_mov_b32_e32 v2, s2
 ; NEW_RBS-NEXT:    v_cmp_eq_f32_e64 s0, s0, 0
-; NEW_RBS-NEXT:    s_cmp_lg_u32 s0, 0
-; NEW_RBS-NEXT:    s_cselect_b32 s0, 1, 0
-; NEW_RBS-NEXT:    s_and_b32 s0, s0, 1
-; NEW_RBS-NEXT:    s_cmp_lg_u32 s0, 0
-; NEW_RBS-NEXT:    s_cselect_b32 s0, s1, s2
-; NEW_RBS-NEXT:    v_mov_b32_e32 v2, s0
+; NEW_RBS-NEXT:    v_cndmask_b32_e64 v2, v2, s1, s0
 ; NEW_RBS-NEXT:    global_store_dword v[0:1], v2, off
 ; NEW_RBS-NEXT:    s_endpgm
   %vcc_to_scc = fcmp oeq float %a, 0.0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index 3b7a6e12a1b7d..17b966d8dce07 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -4269,11 +4269,10 @@ define amdgpu_ps i48 @s_saddsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[0:1], 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_xor_b32 s2, s0, s3
 ; GFX6-NEXT:    s_ashr_i32 s1, s7, 31
+; GFX6-NEXT:    s_xor_b32 s2, s0, s3
 ; GFX6-NEXT:    s_ashr_i32 s0, s7, 15
 ; GFX6-NEXT:    s_addk_i32 s1, 0x8000
-; GFX6-NEXT:    s_and_b32 s2, s2, 1
 ; GFX6-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX6-NEXT:    ; return to shader part epilog
@@ -4293,11 +4292,10 @@ define amdgpu_ps i48 @s_saddsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[0:1], 0
 ; GFX8-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX8-NEXT:    s_xor_b32 s2, s0, s3
 ; GFX8-NEXT:    s_ashr_i32 s1, s7, 31
+; GFX8-NEXT:    s_xor_b32 s2, s0, s3
 ; GFX8-NEXT:    s_ashr_i32 s0, s7, 15
 ; GFX8-NEXT:    s_addk_i32 s1, 0x8000
-; GFX8-NEXT:    s_and_b32 s2, s2, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX8-NEXT:    ; return to shader part epilog
@@ -4319,7 +4317,6 @@ define amdgpu_ps i48 @s_saddsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX9-NEXT:    s_xor_b32 s2, s0, s6
 ; GFX9-NEXT:    s_ashr_i32 s0, s5, 31
 ; GFX9-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX9-NEXT:    s_and_b32 s2, s2, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX9-NEXT:    s_ashr_i64 s[0:1], s[0:1], 16
@@ -4334,12 +4331,11 @@ define amdgpu_ps i48 @s_saddsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s0, s[4:5], s[0:1]
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s1, s[2:3], 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10PLUS-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX10PLUS-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX10PLUS-NEXT:    s_xor_b32 s1, s1, s0
 ; GFX10PLUS-NEXT:    s_ashr_i32 s0, s5, 31
-; GFX10PLUS-NEXT:    s_and_b32 s2, s1, 1
+; GFX10PLUS-NEXT:    s_xor_b32 s2, s1, s2
 ; GFX10PLUS-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
@@ -4638,7 +4634,6 @@ define amdgpu_ps i64 @s_saddsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX6-NEXT:    s_xor_b32 s2, s0, s6
 ; GFX6-NEXT:    s_ashr_i32 s0, s5, 31
 ; GFX6-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX6-NEXT:    s_and_b32 s2, s2, 1
 ; GFX6-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX6-NEXT:    ; return to shader part epilog
@@ -4658,7 +4653,6 @@ define amdgpu_ps i64 @s_saddsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX8-NEXT:    s_xor_b32 s2, s0, s6
 ; GFX8-NEXT:    s_ashr_i32 s0, s5, 31
 ; GFX8-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX8-NEXT:    s_and_b32 s2, s2, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX8-NEXT:    ; return to shader part epilog
@@ -4678,7 +4672,6 @@ define amdgpu_ps i64 @s_saddsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX9-NEXT:    s_xor_b32 s2, s0, s6
 ; GFX9-NEXT:    s_ashr_i32 s0, s5, 31
 ; GFX9-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX9-NEXT:    s_and_b32 s2, s2, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX9-NEXT:    ; return to shader part epilog
@@ -4690,12 +4683,11 @@ define amdgpu_ps i64 @s_saddsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s0, s[4:5], s[0:1]
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s1, s[2:3], 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10PLUS-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX10PLUS-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX10PLUS-NEXT:    s_xor_b32 s1, s1, s0
 ; GFX10PLUS-NEXT:    s_ashr_i32 s0, s5, 31
-; GFX10PLUS-NEXT:    s_and_b32 s2, s1, 1
+; GFX10PLUS-NEXT:    s_xor_b32 s2, s1, s2
 ; GFX10PLUS-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
@@ -4994,7 +4986,6 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX6-NEXT:    s_xor_b32 s4, s0, s10
 ; GFX6-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX6-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX6-NEXT:    s_and_b32 s4, s4, 1
 ; GFX6-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
 ; GFX6-NEXT:    s_add_u32 s4, s2, s6
@@ -5009,7 +5000,6 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX6-NEXT:    s_xor_b32 s6, s2, s8
 ; GFX6-NEXT:    s_ashr_i32 s2, s5, 31
 ; GFX6-NEXT:    s_add_i32 s3, s2, 0x80000000
-; GFX6-NEXT:    s_and_b32 s6, s6, 1
 ; GFX6-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
 ; GFX6-NEXT:    ; return to shader part epilog
@@ -5029,7 +5019,6 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX8-NEXT:    s_xor_b32 s4, s0, s10
 ; GFX8-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX8-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX8-NEXT:    s_and_b32 s4, s4, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
 ; GFX8-NEXT:    s_add_u32 s4, s2, s6
@@ -5045,7 +5034,6 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX8-NEXT:    s_xor_b32 s6, s2, s8
 ; GFX8-NEXT:    s_ashr_i32 s2, s5, 31
 ; GFX8-NEXT:    s_add_i32 s3, s2, 0x80000000
-; GFX8-NEXT:    s_and_b32 s6, s6, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
 ; GFX8-NEXT:    ; return to shader part epilog
@@ -5065,7 +5053,6 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX9-NEXT:    s_xor_b32 s4, s0, s10
 ; GFX9-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX9-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX9-NEXT:    s_and_b32 s4, s4, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
 ; GFX9-NEXT:    s_add_u32 s4, s2, s6
@@ -5081,7 +5068,6 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX9-NEXT:    s_xor_b32 s6, s2, s8
 ; GFX9-NEXT:    s_ashr_i32 s2, s5, 31
 ; GFX9-NEXT:    s_add_i32 s3, s2, 0x80000000
-; GFX9-NEXT:    s_and_b32 s6, s6, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
 ; GFX9-NEXT:    ; return to shader part epilog
@@ -5093,12 +5079,11 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s0, s[8:9], s[0:1]
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s1, s[4:5], 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10PLUS-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s4, 1, 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX10PLUS-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX10PLUS-NEXT:    s_xor_b32 s1, s1, s0
 ; GFX10PLUS-NEXT:    s_ashr_i32 s0, s9, 31
-; GFX10PLUS-NEXT:    s_and_b32 s4, s1, 1
+; GFX10PLUS-NEXT:    s_xor_b32 s4, s1, s4
 ; GFX10PLUS-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
@@ -5107,12 +5092,11 @@ define amdgpu_ps <2 x i64> @s_saddsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s2, s[4:5], s[2:3]
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s3, s[6:7], 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX10PLUS-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s6, 1, 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX10PLUS-NEXT:    s_cselect_b32 s3, 1, 0
-; GFX10PLUS-NEXT:    s_xor_b32 s3, s3, s2
 ; GFX10PLUS-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX10PLUS-NEXT:    s_and_b32 s6, s3, 1
+; GFX10PLUS-NEXT:    s_xor_b32 s6, s3, s6
 ; GFX10PLUS-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX10PLUS-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
@@ -5140,18 +5124,12 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
 ; GFX6-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], vcc, vcc
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_and_b32 s3, s0, 1
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[6:7], 0
-; GFX6-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX6-NEXT:    s_cselect_b32 s2, s10, s2
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], 0
 ; GFX6-NEXT:    s_cselect_b32 s3, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_and_b32 s0, s0, 1
-; GFX6-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX6-NEXT:    s_cselect_b32 s0, 0, s3
 ; GFX6-NEXT:    s_xor_b32 s6, s0, s2
 ; GFX6-NEXT:    s_ashr_i32 s0, s9, 31
@@ -5416,26 +5394,23 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX6-NEXT:    v_add_i32_e32 v4, vcc, s0, v0
 ; GFX6-NEXT:    v_addc_u32_e32 v5, vcc, v1, v5, vcc
 ; GFX6-NEXT:    v_mov_b32_e32 v6, s2
-; GFX6-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[2:3], 0
 ; GFX6-NEXT:    v_mov_b32_e32 v7, s3
 ; GFX6-NEXT:    v_addc_u32_e32 v6, vcc, v2, v6, vcc
 ; GFX6-NEXT:    v_addc_u32_e32 v7, vcc, v3, v7, vcc
-; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    v_cmp_lt_u64_e32 vcc, v[4:5], v[0:1]
-; GFX6-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[2:3], 0
+; GFX6-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[2:3], 0
 ; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, v[6:7], v[2:3]
-; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX6-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3]
-; GFX6-NEXT:    s_and_b32 s0, s0, 1
-; GFX6-NEXT:    s_cmp_lg_u32 s0, 0
+; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX6-NEXT:    s_cselect_b32 s0, 0, s4
-; GFX6-NEXT:    v_xor_b32_e32 v0, s0, v0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s0
+; GFX6-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[2:3], 0
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
+; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
+; GFX6-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX6-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0x80000000, v2
 ; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
@@ -5465,9 +5440,8 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX8-NEXT:    s_cmp_eq_u64 s[2:3], 0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX8-NEXT:    s_cselect_b32 s0, 0, s0
-; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
-; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x80000000, v2
 ; GFX8-NEXT:    v_cmp_ne_u16_e32 vcc, 0, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
@@ -5497,7 +5471,6 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX9-NEXT:    s_cselect_b32 s0, 0, s0
 ; GFX9-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX9-NEXT:    v_add_u32_e32 v3, 0x80000000, v2
 ; GFX9-NEXT:    v_cmp_ne_u16_e32 vcc, 0, v0
@@ -5527,7 +5500,6 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v2
 ; GFX10-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX10-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc_lo
@@ -5554,7 +5526,6 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX11-NEXT:    s_cselect_b32 s0, 0, s0
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v1, v0 :: v_dual_add_nc_u32 v3, 0x80000000, v2
 ; GFX11-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX11-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0, v0
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v4, v2 :: v_dual_cndmask_b32 v3, v7, v3
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc_lo
@@ -5836,18 +5807,12 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
 ; GFX6-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], vcc, vcc
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_and_b32 s3, s0, 1
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[10:11], 0
-; GFX6-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX6-NEXT:    s_cselect_b32 s2, s18, s2
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[10:11], 0
 ; GFX6-NEXT:    s_cselect_b32 s3, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_and_b32 s0, s0, 1
-; GFX6-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX6-NEXT:    s_cselect_b32 s0, 0, s3
 ; GFX6-NEXT:    s_xor_b32 s10, s0, s2
 ; GFX6-NEXT:    s_ashr_i32 s0, s17, 31
@@ -5874,18 +5839,12 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1]
 ; GFX6-NEXT:    s_cselect_b32 s6, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[4:5], vcc, vcc
-; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX6-NEXT:    s_and_b32 s7, s4, 1
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], s[14:15], 0
-; GFX6-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX6-NEXT:    s_cselect_b32 s6, s12, s6
 ; GFX6-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5]
 ; GFX6-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[14:15], 0
 ; GFX6-NEXT:    s_cselect_b32 s7, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5]
-; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX6-NEXT:    s_and_b32 s4, s4, 1
-; GFX6-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX6-NEXT:    s_cselect_b32 s4, 0, s7
 ; GFX6-NEXT:    s_xor_b32 s12, s4, s6
 ; GFX6-NEXT:    s_ashr_i32 s4, s11, 31
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index 677baf991fd1d..cbdad6f32e2ff 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -4273,11 +4273,10 @@ define amdgpu_ps i48 @s_ssubsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_xor_b32 s2, s0, s3
 ; GFX6-NEXT:    s_ashr_i32 s1, s7, 31
+; GFX6-NEXT:    s_xor_b32 s2, s0, s3
 ; GFX6-NEXT:    s_ashr_i32 s0, s7, 15
 ; GFX6-NEXT:    s_addk_i32 s1, 0x8000
-; GFX6-NEXT:    s_and_b32 s2, s2, 1
 ; GFX6-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX6-NEXT:    ; return to shader part epilog
@@ -4297,11 +4296,10 @@ define amdgpu_ps i48 @s_ssubsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], 0
 ; GFX8-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX8-NEXT:    s_xor_b32 s2, s0, s3
 ; GFX8-NEXT:    s_ashr_i32 s1, s7, 31
+; GFX8-NEXT:    s_xor_b32 s2, s0, s3
 ; GFX8-NEXT:    s_ashr_i32 s0, s7, 15
 ; GFX8-NEXT:    s_addk_i32 s1, 0x8000
-; GFX8-NEXT:    s_and_b32 s2, s2, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX8-NEXT:    ; return to shader part epilog
@@ -4323,7 +4321,6 @@ define amdgpu_ps i48 @s_ssubsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX9-NEXT:    s_xor_b32 s2, s0, s6
 ; GFX9-NEXT:    s_ashr_i32 s0, s5, 31
 ; GFX9-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX9-NEXT:    s_and_b32 s2, s2, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX9-NEXT:    s_ashr_i64 s[0:1], s[0:1], 16
@@ -4338,12 +4335,11 @@ define amdgpu_ps i48 @s_ssubsat_i48(i48 inreg %lhs, i48 inreg %rhs) {
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s0, s[4:5], s[0:1]
 ; GFX10PLUS-NEXT:    v_cmp_gt_i64_e64 s1, s[2:3], 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10PLUS-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX10PLUS-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX10PLUS-NEXT:    s_xor_b32 s1, s1, s0
 ; GFX10PLUS-NEXT:    s_ashr_i32 s0, s5, 31
-; GFX10PLUS-NEXT:    s_and_b32 s2, s1, 1
+; GFX10PLUS-NEXT:    s_xor_b32 s2, s1, s2
 ; GFX10PLUS-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
@@ -4642,7 +4638,6 @@ define amdgpu_ps i64 @s_ssubsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX6-NEXT:    s_xor_b32 s2, s0, s6
 ; GFX6-NEXT:    s_ashr_i32 s0, s5, 31
 ; GFX6-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX6-NEXT:    s_and_b32 s2, s2, 1
 ; GFX6-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX6-NEXT:    ; return to shader part epilog
@@ -4662,7 +4657,6 @@ define amdgpu_ps i64 @s_ssubsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX8-NEXT:    s_xor_b32 s2, s0, s6
 ; GFX8-NEXT:    s_ashr_i32 s0, s5, 31
 ; GFX8-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX8-NEXT:    s_and_b32 s2, s2, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX8-NEXT:    ; return to shader part epilog
@@ -4682,7 +4676,6 @@ define amdgpu_ps i64 @s_ssubsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX9-NEXT:    s_xor_b32 s2, s0, s6
 ; GFX9-NEXT:    s_ashr_i32 s0, s5, 31
 ; GFX9-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX9-NEXT:    s_and_b32 s2, s2, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX9-NEXT:    ; return to shader part epilog
@@ -4694,12 +4687,11 @@ define amdgpu_ps i64 @s_ssubsat_i64(i64 inreg %lhs, i64 inreg %rhs) {
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s0, s[4:5], s[0:1]
 ; GFX10PLUS-NEXT:    v_cmp_gt_i64_e64 s1, s[2:3], 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10PLUS-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX10PLUS-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX10PLUS-NEXT:    s_xor_b32 s1, s1, s0
 ; GFX10PLUS-NEXT:    s_ashr_i32 s0, s5, 31
-; GFX10PLUS-NEXT:    s_and_b32 s2, s1, 1
+; GFX10PLUS-NEXT:    s_xor_b32 s2, s1, s2
 ; GFX10PLUS-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
@@ -4998,7 +4990,6 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX6-NEXT:    s_xor_b32 s4, s0, s10
 ; GFX6-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX6-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX6-NEXT:    s_and_b32 s4, s4, 1
 ; GFX6-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
 ; GFX6-NEXT:    s_sub_u32 s4, s2, s6
@@ -5013,7 +5004,6 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX6-NEXT:    s_xor_b32 s6, s2, s8
 ; GFX6-NEXT:    s_ashr_i32 s2, s5, 31
 ; GFX6-NEXT:    s_add_i32 s3, s2, 0x80000000
-; GFX6-NEXT:    s_and_b32 s6, s6, 1
 ; GFX6-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
 ; GFX6-NEXT:    ; return to shader part epilog
@@ -5033,7 +5023,6 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX8-NEXT:    s_xor_b32 s4, s0, s10
 ; GFX8-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX8-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX8-NEXT:    s_and_b32 s4, s4, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
 ; GFX8-NEXT:    s_sub_u32 s4, s2, s6
@@ -5049,7 +5038,6 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX8-NEXT:    s_xor_b32 s6, s2, s8
 ; GFX8-NEXT:    s_ashr_i32 s2, s5, 31
 ; GFX8-NEXT:    s_add_i32 s3, s2, 0x80000000
-; GFX8-NEXT:    s_and_b32 s6, s6, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
 ; GFX8-NEXT:    ; return to shader part epilog
@@ -5069,7 +5057,6 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX9-NEXT:    s_xor_b32 s4, s0, s10
 ; GFX9-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX9-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX9-NEXT:    s_and_b32 s4, s4, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
 ; GFX9-NEXT:    s_sub_u32 s4, s2, s6
@@ -5085,7 +5072,6 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX9-NEXT:    s_xor_b32 s6, s2, s8
 ; GFX9-NEXT:    s_ashr_i32 s2, s5, 31
 ; GFX9-NEXT:    s_add_i32 s3, s2, 0x80000000
-; GFX9-NEXT:    s_and_b32 s6, s6, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
 ; GFX9-NEXT:    ; return to shader part epilog
@@ -5097,12 +5083,11 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s0, s[8:9], s[0:1]
 ; GFX10PLUS-NEXT:    v_cmp_gt_i64_e64 s1, s[4:5], 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10PLUS-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s4, 1, 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX10PLUS-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX10PLUS-NEXT:    s_xor_b32 s1, s1, s0
 ; GFX10PLUS-NEXT:    s_ashr_i32 s0, s9, 31
-; GFX10PLUS-NEXT:    s_and_b32 s4, s1, 1
+; GFX10PLUS-NEXT:    s_xor_b32 s4, s1, s4
 ; GFX10PLUS-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
@@ -5111,12 +5096,11 @@ define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inre
 ; GFX10PLUS-NEXT:    v_cmp_lt_i64_e64 s2, s[4:5], s[2:3]
 ; GFX10PLUS-NEXT:    v_cmp_gt_i64_e64 s3, s[6:7], 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX10PLUS-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s6, 1, 0
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX10PLUS-NEXT:    s_cselect_b32 s3, 1, 0
-; GFX10PLUS-NEXT:    s_xor_b32 s3, s3, s2
 ; GFX10PLUS-NEXT:    s_ashr_i32 s2, s5, 31
-; GFX10PLUS-NEXT:    s_and_b32 s6, s3, 1
+; GFX10PLUS-NEXT:    s_xor_b32 s6, s3, s6
 ; GFX10PLUS-NEXT:    s_add_i32 s3, s2, 0x80000000
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX10PLUS-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
@@ -5144,10 +5128,7 @@ define amdgpu_ps i128 @s_ssubsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1]
 ; GFX6-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], vcc, vcc
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_and_b32 s3, s0, 1
 ; GFX6-NEXT:    v_cmp_gt_u64_e64 s[0:1], s[4:5], 0
-; GFX6-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX6-NEXT:    s_cselect_b32 s2, s12, s2
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[6:7], 0
@@ -5156,9 +5137,6 @@ define amdgpu_ps i128 @s_ssubsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX6-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[6:7], 0
 ; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_and_b32 s0, s0, 1
-; GFX6-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX6-NEXT:    s_cselect_b32 s0, s3, s4
 ; GFX6-NEXT:    s_xor_b32 s4, s0, s2
 ; GFX6-NEXT:    s_ashr_i32 s0, s11, 31
@@ -5439,30 +5417,28 @@ define amdgpu_ps <4 x float> @ssubsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_mov_b32_e32 v5, s1
 ; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, s0, v0
-; GFX6-NEXT:    v_cmp_gt_u64_e64 s[0:1], s[0:1], 0
 ; GFX6-NEXT:    v_subb_u32_e32 v5, vcc, v1, v5, vcc
-; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    v_mov_b32_e32 v6, s2
-; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[2:3], 0
 ; GFX6-NEXT:    v_mov_b32_e32 v7, s3
 ; GFX6-NEXT:    v_subb_u32_e32 v6, vcc, v2, v6, vcc
 ; GFX6-NEXT:    v_subb_u32_e32 v7, vcc, v3, v7, vcc
-; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    v_cmp_lt_u64_e32 vcc, v[4:5], v[0:1]
-; GFX6-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[2:3], 0
+; GFX6-NEXT:    v_cmp_gt_u64_e64 s[0:1], s[0:1], 0
 ; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, v[6:7], v[2:3]
-; GFX6-NEXT:    s_cselect_b32 s5, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[2:3], 0
 ; GFX6-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3]
-; GFX6-NEXT:    s_and_b32 s0, s0, 1
-; GFX6-NEXT:    s_cmp_lg_u32 s0, 0
+; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
+; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX6-NEXT:    s_cselect_b32 s0, s4, s5
-; GFX6-NEXT:    v_xor_b32_e32 v0, s0, v0
+; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX6-NEXT:    v_cmp_eq_u64_e64 vcc, s[2:3], 0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s4
+; GFX6-NEXT:    v_mov_b32_e32 v2, s0
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX6-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX6-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0x80000000, v2
@@ -5496,9 +5472,8 @@ define amdgpu_ps <4 x float> @ssubsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX8-NEXT:    s_cmp_eq_u64 s[2:3], 0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX8-NEXT:    s_cselect_b32 s0, s4, s0
-; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
-; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x80000000, v2
 ; GFX8-NEXT:    v_cmp_ne_u16_e32 vcc, 0, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc
@@ -5531,7 +5506,6 @@ define amdgpu_ps <4 x float> @ssubsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX9-NEXT:    s_cselect_b32 s0, s4, s0
 ; GFX9-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX9-NEXT:    v_add_u32_e32 v3, 0x80000000, v2
 ; GFX9-NEXT:    v_cmp_ne_u16_e32 vcc, 0, v0
@@ -5564,7 +5538,6 @@ define amdgpu_ps <4 x float> @ssubsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v2
 ; GFX10-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX10-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc_lo
@@ -5594,7 +5567,6 @@ define amdgpu_ps <4 x float> @ssubsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX11-NEXT:    s_cselect_b32 s0, s0, s1
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v1, v0 :: v_dual_add_nc_u32 v3, 0x80000000, v2
 ; GFX11-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX11-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0, v0
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v4, v2 :: v_dual_cndmask_b32 v3, v7, v3
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc_lo
@@ -5895,10 +5867,7 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, s[18:19], v[0:1]
 ; GFX6-NEXT:    s_cselect_b32 s2, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], vcc, vcc
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_and_b32 s3, s0, 1
 ; GFX6-NEXT:    v_cmp_gt_u64_e64 s[0:1], s[8:9], 0
-; GFX6-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX6-NEXT:    s_cselect_b32 s2, s20, s2
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[10:11], 0
@@ -5907,9 +5876,6 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX6-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[10:11], 0
 ; GFX6-NEXT:    s_cselect_b32 s8, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
-; GFX6-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX6-NEXT:    s_and_b32 s0, s0, 1
-; GFX6-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX6-NEXT:    s_cselect_b32 s0, s3, s8
 ; GFX6-NEXT:    s_xor_b32 s8, s0, s2
 ; GFX6-NEXT:    s_ashr_i32 s0, s19, 31
@@ -5936,10 +5902,7 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1]
 ; GFX6-NEXT:    s_cselect_b32 s6, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[4:5], vcc, vcc
-; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX6-NEXT:    s_and_b32 s7, s4, 1
 ; GFX6-NEXT:    v_cmp_gt_u64_e64 s[4:5], s[12:13], 0
-; GFX6-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX6-NEXT:    s_cselect_b32 s6, s16, s6
 ; GFX6-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[4:5], s[14:15], 0
@@ -5948,9 +5911,6 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX6-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[14:15], 0
 ; GFX6-NEXT:    s_cselect_b32 s12, 1, 0
 ; GFX6-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5]
-; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
-; GFX6-NEXT:    s_and_b32 s4, s4, 1
-; GFX6-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX6-NEXT:    s_cselect_b32 s4, s7, s12
 ; GFX6-NEXT:    s_xor_b32 s12, s4, s6
 ; GFX6-NEXT:    s_ashr_i32 s4, s11, 31
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll
index 904f33fbb924c..fa1f5472c3083 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll
@@ -704,7 +704,6 @@ define amdgpu_ps i64 @s_ssubo_i64(i64 inreg %a, i64 inreg %b) {
 ; GFX7-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
 ; GFX7-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX7-NEXT:    s_xor_b32 s0, s0, s6
-; GFX7-NEXT:    s_and_b32 s0, s0, 1
 ; GFX7-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX7-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX7-NEXT:    s_sub_u32 s0, s4, s0
@@ -724,7 +723,6 @@ define amdgpu_ps i64 @s_ssubo_i64(i64 inreg %a, i64 inreg %b) {
 ; GFX8-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX8-NEXT:    s_xor_b32 s0, s0, s6
-; GFX8-NEXT:    s_and_b32 s0, s0, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX8-NEXT:    s_sub_u32 s0, s4, s0
@@ -744,7 +742,6 @@ define amdgpu_ps i64 @s_ssubo_i64(i64 inreg %a, i64 inreg %b) {
 ; GFX9-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX9-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX9-NEXT:    s_xor_b32 s0, s0, s6
-; GFX9-NEXT:    s_and_b32 s0, s0, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX9-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX9-NEXT:    s_sub_u32 s0, s4, s0
diff --git a/llvm/test/CodeGen/AMDGPU/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AMDGPU/fptosi-sat-scalar.ll
index 20ac23daebba2..0a8f19a452fef 100644
--- a/llvm/test/CodeGen/AMDGPU/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptosi-sat-scalar.ll
@@ -84,8 +84,8 @@ define i8 @test_signed_i8_f32(float %f) nounwind {
 ; GFX12-GI-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GI-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX12-GI-NEXT:    v_mov_b32_e32 v1, 0xffffff80
-; GFX12-GI-NEXT:    v_med3_i32 v0, v0, v1, 0x7f
+; GFX12-GI-NEXT:    v_mov_b32_e32 v1, 0x7f
+; GFX12-GI-NEXT:    v_med3_i32 v0, 0xffffff80, v0, v1
 ; GFX12-GI-NEXT:    s_setpc_b64 s[30:31]
     %x = call i8 @llvm.fptosi.sat.i8.f32(float %f)
     ret i8 %x
@@ -130,8 +130,8 @@ define i16 @test_signed_i16_f32(float %f) nounwind {
 ; GFX12-GI-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GI-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX12-GI-NEXT:    v_mov_b32_e32 v1, 0xffff8000
-; GFX12-GI-NEXT:    v_med3_i32 v0, v0, v1, 0x7fff
+; GFX12-GI-NEXT:    v_mov_b32_e32 v1, 0x7fff
+; GFX12-GI-NEXT:    v_med3_i32 v0, 0xffff8000, v0, v1
 ; GFX12-GI-NEXT:    s_setpc_b64 s[30:31]
     %x = call i16 @llvm.fptosi.sat.i16.f32(float %f)
     ret i16 %x
@@ -367,8 +367,8 @@ define i8 @test_signed_i8_f64(double %f) nounwind {
 ; GFX12-GI-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GI-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GI-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX12-GI-NEXT:    v_mov_b32_e32 v1, 0xffffff80
-; GFX12-GI-NEXT:    v_med3_i32 v0, v0, v1, 0x7f
+; GFX12-GI-NEXT:    v_mov_b32_e32 v1, 0x7f
+; GFX12-GI-NEXT:    v_med3_i32 v0, 0xffffff80, v0, v1
 ; GFX12-GI-NEXT:    s_setpc_b64 s[30:31]
     %x = call i8 @llvm.fptosi.sat.i8.f64(double %f)
     ret i8 %x
@@ -413,8 +413,8 @@ define i16 @test_signed_i16_f64(double %f) nounwind {
 ; GFX12-GI-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GI-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GI-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX12-GI-NEXT:    v_mov_b32_e32 v1, 0xffff8000
-; GFX12-GI-NEXT:    v_med3_i32 v0, v0, v1, 0x7fff
+; GFX12-GI-NEXT:    v_mov_b32_e32 v1, 0x7fff
+; GFX12-GI-NEXT:    v_med3_i32 v0, 0xffff8000, v0, v1
 ; GFX12-GI-NEXT:    s_setpc_b64 s[30:31]
     %x = call i16 @llvm.fptosi.sat.i16.f64(double %f)
     ret i16 %x
@@ -697,9 +697,9 @@ define i8 @test_signed_i8_f16(half %f) nounwind {
 ; GFX12-GI-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GI-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v0, v0.l
-; GFX12-GI-NEXT:    v_mov_b32_e32 v1, 0xffffff80
+; GFX12-GI-NEXT:    v_mov_b32_e32 v1, 0x7f
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX12-GI-NEXT:    v_med3_i32 v0, v0, v1, 0x7f
+; GFX12-GI-NEXT:    v_med3_i32 v0, 0xffffff80, v0, v1
 ; GFX12-GI-NEXT:    s_setpc_b64 s[30:31]
     %x = call i8 @llvm.fptosi.sat.i8.f16(half %f)
     ret i8 %x
@@ -770,9 +770,9 @@ define i16 @test_signed_i16_f16(half %f) nounwind {
 ; GFX12-GI-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-GI-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v0, v0.l
-; GFX12-GI-NEXT:    v_mov_b32_e32 v1, 0xffff8000
+; GFX12-GI-NEXT:    v_mov_b32_e32 v1, 0x7fff
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX12-GI-NEXT:    v_med3_i32 v0, v0, v1, 0x7fff
+; GFX12-GI-NEXT:    v_med3_i32 v0, 0xffff8000, v0, v1
 ; GFX12-GI-NEXT:    s_setpc_b64 s[30:31]
     %x = call i16 @llvm.fptosi.sat.i16.f16(half %f)
     ret i16 %x
diff --git a/llvm/test/CodeGen/AMDGPU/fptosi-sat-vector.ll b/llvm/test/CodeGen/AMDGPU/fptosi-sat-vector.ll
index 1bfc5798f15c9..5119d65025220 100644
--- a/llvm/test/CodeGen/AMDGPU/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptosi-sat-vector.ll
@@ -657,9 +657,9 @@ define <2 x i8> @test_signed_v2f64_v2i8(<2 x double> %f) {
 ; GFX12-GI-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GI-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
 ; GFX12-GI-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX12-GI-NEXT:    v_mov_b32_e32 v2, 0xffffff80
-; GFX12-GI-NEXT:    v_med3_i32 v0, v0, v2, 0x7f
-; GFX12-GI-NEXT:    v_med3_i32 v1, v1, v2, 0x7f
+; GFX12-GI-NEXT:    v_mov_b32_e32 v2, 0x7f
+; GFX12-GI-NEXT:    v_med3_i32 v0, 0xffffff80, v0, v2
+; GFX12-GI-NEXT:    v_med3_i32 v1, 0xffffff80, v1, v2
 ; GFX12-GI-NEXT:    s_setpc_b64 s[30:31]
     %x = call <2 x i8> @llvm.fptosi.sat.v2f64.v2i8(<2 x double> %f)
     ret <2 x i8> %x
@@ -742,9 +742,9 @@ define <2 x i16> @test_signed_v2f64_v2i16(<2 x double> %f) {
 ; GFX12-GI-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-GI-NEXT:    v_cvt_i32_f64_e32 v2, v[2:3]
 ; GFX12-GI-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX12-GI-NEXT:    v_mov_b32_e32 v1, 0xffff8000
-; GFX12-GI-NEXT:    v_med3_i32 v2, v2, v1, 0x7fff
-; GFX12-GI-NEXT:    v_med3_i32 v0, v0, v1, 0x7fff
+; GFX12-GI-NEXT:    v_mov_b32_e32 v1, 0x7fff
+; GFX12-GI-NEXT:    v_med3_i32 v2, 0xffff8000, v2, v1
+; GFX12-GI-NEXT:    v_med3_i32 v0, 0xffff8000, v0, v1
 ; GFX12-GI-NEXT:    v_mov_b16_e32 v0.h, v2.l
 ; GFX12-GI-NEXT:    s_setpc_b64 s[30:31]
     %x = call <2 x i16> @llvm.fptosi.sat.v2f64.v2i16(<2 x double> %f)
@@ -1252,15 +1252,15 @@ define <4 x i8> @test_signed_v4f16_v4i8(<4 x half> %f) {
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v0, v0.h
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v3, v1.l
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v1, v1.h
-; GFX12-GI-NEXT:    v_mov_b32_e32 v4, 0xffffff80
+; GFX12-GI-NEXT:    v_mov_b32_e32 v4, 0x7f
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v5, v0
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v3, v3
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v6, v1
-; GFX12-GI-NEXT:    v_med3_i32 v0, v2, v4, 0x7f
-; GFX12-GI-NEXT:    v_med3_i32 v1, v5, v4, 0x7f
-; GFX12-GI-NEXT:    v_med3_i32 v2, v3, v4, 0x7f
-; GFX12-GI-NEXT:    v_med3_i32 v3, v6, v4, 0x7f
+; GFX12-GI-NEXT:    v_med3_i32 v0, 0xffffff80, v2, v4
+; GFX12-GI-NEXT:    v_med3_i32 v1, 0xffffff80, v5, v4
+; GFX12-GI-NEXT:    v_med3_i32 v2, 0xffffff80, v3, v4
+; GFX12-GI-NEXT:    v_med3_i32 v3, 0xffffff80, v6, v4
 ; GFX12-GI-NEXT:    s_setpc_b64 s[30:31]
     %x = call <4 x i8> @llvm.fptosi.sat.v4f16.v4i8(<4 x half> %f)
     ret <4 x i8> %x
@@ -1394,15 +1394,15 @@ define <4 x i16> @test_signed_v4f16_v4i16(<4 x half> %f) {
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v3, v1.h
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v0, v0.l
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v1, v1.l
-; GFX12-GI-NEXT:    v_mov_b32_e32 v4, 0xffff8000
+; GFX12-GI-NEXT:    v_mov_b32_e32 v4, 0x7fff
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v3, v3
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v0, v0
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v1, v1
-; GFX12-GI-NEXT:    v_med3_i32 v2, v2, v4, 0x7fff
-; GFX12-GI-NEXT:    v_med3_i32 v3, v3, v4, 0x7fff
-; GFX12-GI-NEXT:    v_med3_i32 v0, v0, v4, 0x7fff
-; GFX12-GI-NEXT:    v_med3_i32 v1, v1, v4, 0x7fff
+; GFX12-GI-NEXT:    v_med3_i32 v2, 0xffff8000, v2, v4
+; GFX12-GI-NEXT:    v_med3_i32 v3, 0xffff8000, v3, v4
+; GFX12-GI-NEXT:    v_med3_i32 v0, 0xffff8000, v0, v4
+; GFX12-GI-NEXT:    v_med3_i32 v1, 0xffff8000, v1, v4
 ; GFX12-GI-NEXT:    v_mov_b16_e32 v0.h, v2.l
 ; GFX12-GI-NEXT:    v_mov_b16_e32 v1.h, v3.l
 ; GFX12-GI-NEXT:    s_setpc_b64 s[30:31]
@@ -2132,29 +2132,29 @@ define <8 x i8> @test_signed_v8f16_v8i8(<8 x half> %f) {
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v4, v0.l
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v0, v0.h
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v5, v1.l
-; GFX12-GI-NEXT:    v_mov_b32_e32 v7, 0xffffff80
+; GFX12-GI-NEXT:    v_mov_b32_e32 v7, 0x7f
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v9, v1.h
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v4, v4
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v6, v0
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v5, v5
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v10, v2.l
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v2, v2.h
-; GFX12-GI-NEXT:    v_med3_i32 v0, v4, v7, 0x7f
+; GFX12-GI-NEXT:    v_med3_i32 v0, 0xffffff80, v4, v7
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v4, v3.l
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v3, v3.h
-; GFX12-GI-NEXT:    v_med3_i32 v1, v6, v7, 0x7f
-; GFX12-GI-NEXT:    v_med3_i32 v8, v5, v7, 0x7f
+; GFX12-GI-NEXT:    v_med3_i32 v1, 0xffffff80, v6, v7
+; GFX12-GI-NEXT:    v_med3_i32 v8, 0xffffff80, v5, v7
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v5, v9
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v6, v10
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v9, v4
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v10, v3
-; GFX12-GI-NEXT:    v_med3_i32 v3, v5, v7, 0x7f
-; GFX12-GI-NEXT:    v_med3_i32 v5, v2, v7, 0x7f
+; GFX12-GI-NEXT:    v_med3_i32 v3, 0xffffff80, v5, v7
+; GFX12-GI-NEXT:    v_med3_i32 v5, 0xffffff80, v2, v7
 ; GFX12-GI-NEXT:    v_mov_b32_e32 v2, v8
-; GFX12-GI-NEXT:    v_med3_i32 v4, v6, v7, 0x7f
-; GFX12-GI-NEXT:    v_med3_i32 v6, v9, v7, 0x7f
-; GFX12-GI-NEXT:    v_med3_i32 v7, v10, v7, 0x7f
+; GFX12-GI-NEXT:    v_med3_i32 v4, 0xffffff80, v6, v7
+; GFX12-GI-NEXT:    v_med3_i32 v6, 0xffffff80, v9, v7
+; GFX12-GI-NEXT:    v_med3_i32 v7, 0xffffff80, v10, v7
 ; GFX12-GI-NEXT:    s_setpc_b64 s[30:31]
     %x = call <8 x i8> @llvm.fptosi.sat.v8f16.v8i8(<8 x half> %f)
     ret <8 x i8> %x
@@ -2363,28 +2363,28 @@ define <8 x i16> @test_signed_v8f16_v8i16(<8 x half> %f) {
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v5, v1.l
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v4, v0.l
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v0, v0.h
-; GFX12-GI-NEXT:    v_mov_b32_e32 v6, 0xffff8000
+; GFX12-GI-NEXT:    v_mov_b32_e32 v6, 0x7fff
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v8, v1.h
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v5, v5
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v4, v4
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v7, v0
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v9, v2.l
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v2, v2.h
-; GFX12-GI-NEXT:    v_med3_i32 v1, v5, v6, 0x7fff
+; GFX12-GI-NEXT:    v_med3_i32 v1, 0xffff8000, v5, v6
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v5, v3.h
 ; GFX12-GI-NEXT:    v_cvt_f32_f16_e32 v3, v3.l
-; GFX12-GI-NEXT:    v_med3_i32 v0, v4, v6, 0x7fff
-; GFX12-GI-NEXT:    v_med3_i32 v4, v7, v6, 0x7fff
+; GFX12-GI-NEXT:    v_med3_i32 v0, 0xffff8000, v4, v6
+; GFX12-GI-NEXT:    v_med3_i32 v4, 0xffff8000, v7, v6
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v7, v8
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v8, v9
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v9, v2
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v5, v5
 ; GFX12-GI-NEXT:    v_cvt_i32_f32_e32 v3, v3
-; GFX12-GI-NEXT:    v_med3_i32 v7, v7, v6, 0x7fff
-; GFX12-GI-NEXT:    v_med3_i32 v2, v8, v6, 0x7fff
-; GFX12-GI-NEXT:    v_med3_i32 v8, v9, v6, 0x7fff
-; GFX12-GI-NEXT:    v_med3_i32 v5, v5, v6, 0x7fff
-; GFX12-GI-NEXT:    v_med3_i32 v3, v3, v6, 0x7fff
+; GFX12-GI-NEXT:    v_med3_i32 v7, 0xffff8000, v7, v6
+; GFX12-GI-NEXT:    v_med3_i32 v2, 0xffff8000, v8, v6
+; GFX12-GI-NEXT:    v_med3_i32 v8, 0xffff8000, v9, v6
+; GFX12-GI-NEXT:    v_med3_i32 v5, 0xffff8000, v5, v6
+; GFX12-GI-NEXT:    v_med3_i32 v3, 0xffff8000, v3, v6
 ; GFX12-GI-NEXT:    v_mov_b16_e32 v0.h, v4.l
 ; GFX12-GI-NEXT:    v_mov_b16_e32 v1.h, v7.l
 ; GFX12-GI-NEXT:    v_mov_b16_e32 v2.h, v8.l



More information about the llvm-branch-commits mailing list