[llvm] [AMDGPU] Switch V_CNDMASK operands to shrink it into VOP2 (PR #135162)

Mon Apr 14 04:50:18 PDT 2025

https://github.com/mihajlovicana updated https://github.com/llvm/llvm-project/pull/135162

>From 0a76e245c8c14f9aaa44a561b3ef101a7bfcd50e Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Thu, 10 Apr 2025 13:27:15 +0200
Subject: [PATCH 1/3] [AMDGPU] Switch V_CNDMASK operands to shrink it into VOP2

---
 .../Target/AMDGPU/SIShrinkInstructions.cpp    | 114 ++++++
 .../AMDGPU/GlobalISel/extractelement.ll       | 136 +++----
 .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 137 ++++---
 .../AMDGPU/GlobalISel/select-to-fmin-fmax.ll  |  28 +-
 .../test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll |  18 +-
 .../test/CodeGen/AMDGPU/GlobalISel/usubsat.ll |  18 +-
 ...amdgpu-codegenprepare-fold-binop-select.ll |   4 +-
 llvm/test/CodeGen/AMDGPU/ctlz.ll              |  20 +-
 llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll   |  18 +-
 llvm/test/CodeGen/AMDGPU/cttz.ll              |  16 +-
 llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll   |   4 +-
 .../CodeGen/AMDGPU/dagcombine-fmul-sel.ll     | 124 +++---
 llvm/test/CodeGen/AMDGPU/div_i128.ll          |  20 +-
 llvm/test/CodeGen/AMDGPU/div_v2i128.ll        |  80 ++--
 llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll     |  16 +-
 llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll |  72 ++--
 llvm/test/CodeGen/AMDGPU/fptoi.i128.ll        |  36 +-
 .../CodeGen/AMDGPU/insert_vector_dynelt.ll    |  48 +--
 llvm/test/CodeGen/AMDGPU/itofp.i128.ll        |  36 +-
 llvm/test/CodeGen/AMDGPU/llvm.exp.ll          | 384 +++++++++---------
 llvm/test/CodeGen/AMDGPU/llvm.exp10.ll        | 384 +++++++++---------
 .../CodeGen/AMDGPU/private-memory-atomics.ll  |   6 +-
 .../CodeGen/AMDGPU/short-select-cndmask.ll    |  47 +++
 llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll    |   4 +-
 llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll    |   4 +-
 25 files changed, 967 insertions(+), 807 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll

diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 73343e1c80f33..0b1638c25b9ae 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -51,6 +51,11 @@ class SIShrinkInstructions {
                     unsigned SubReg) const;
   bool instModifiesReg(const MachineInstr *MI, unsigned Reg,
                        unsigned SubReg) const;
+  bool trySwitchOperands(MachineInstr &MI, Register *OldVCC,
+                         Register *NewVCC) const;
+  bool shouldSwitchOperands(MachineRegisterInfo &MRI, MachineInstr &MI,
+                            const SIInstrInfo &TII) const;
+  unsigned getInverseCompareOpcode(MachineInstr &MI) const;
   TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub,
                                                    unsigned I) const;
   void dropInstructionKeepingImpDefs(MachineInstr &MI) const;
@@ -831,6 +836,109 @@ bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const {
   return true;
 }
 
+unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
+  // unsigned 32
+  case AMDGPU::V_CMP_EQ_U32_e64:
+    return AMDGPU::V_CMP_NE_U32_e64;
+  case AMDGPU::V_CMP_NE_U32_e64:
+    return AMDGPU::V_CMP_EQ_U32_e64;
+  case AMDGPU::V_CMP_GE_U32_e64:
+    return AMDGPU::V_CMP_LT_U32_e64;
+  case AMDGPU::V_CMP_LE_U32_e64:
+    return AMDGPU::V_CMP_GT_U32_e64;
+  case AMDGPU::V_CMP_GT_U32_e64:
+    return AMDGPU::V_CMP_LE_U32_e64;
+  case AMDGPU::V_CMP_LT_U32_e64:
+    return AMDGPU::V_CMP_GE_U32_e64;
+    // float 32
+  case AMDGPU::V_CMP_EQ_F32_e64:
+    return AMDGPU::V_CMP_NEQ_F32_e64;
+  case AMDGPU::V_CMP_NEQ_F32_e64:
+    return AMDGPU::V_CMP_EQ_F32_e64;
+  case AMDGPU::V_CMP_GE_F32_e64:
+    return AMDGPU::V_CMP_LT_F32_e64;
+  case AMDGPU::V_CMP_LE_F32_e64:
+    return AMDGPU::V_CMP_GT_F32_e64;
+  case AMDGPU::V_CMP_GT_F32_e64:
+    return AMDGPU::V_CMP_LE_F32_e64;
+  case AMDGPU::V_CMP_LT_F32_e64:
+    return AMDGPU::V_CMP_GE_F32_e64;
+  default:
+    return 0;
+  }
+}
+
+bool SIShrinkInstructions::shouldSwitchOperands(MachineRegisterInfo &MRI,
+                                                MachineInstr &MI,
+                                                const SIInstrInfo &TII) const {
+  auto allUses = MRI.use_nodbg_operands(MI.getOperand(5).getReg());
+  unsigned Count = 0;
+
+  for (auto &Use : allUses) {
+    if (Use.getParent()->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
+      return false;
+    MachineOperand *Src0 =
+        TII.getNamedOperand(*Use.getParent(), AMDGPU::OpName::src0);
+    MachineOperand *Src1 =
+        TII.getNamedOperand(*Use.getParent(), AMDGPU::OpName::src1);
+
+    auto Src0Imm = Src0->isImm();
+    auto Src1Imm = Src1->isImm();
+
+    if (!Src1Imm && Src0Imm)
+      return false;
+    if (Src1Imm && !Src0Imm)
+      Count++;
+  }
+  return (Count >= 1);
+}
+
+// OldVCC and NewVCC are used to remember VCC after inverting comparison
+bool SIShrinkInstructions::trySwitchOperands(MachineInstr &MI, Register *OldVCC,
+                                             Register *NewVCC) const {
+  const DebugLoc &DL = MI.getDebugLoc();
+  auto Reg = MI.getOperand(5).getReg();
+  if (!Reg.isVirtual())
+    return false;
+
+  if (*OldVCC != Reg) {
+    MachineInstr *DefMI = MRI->getVRegDef(Reg);
+    if (DefMI) {
+      unsigned Opcode = getInverseCompareOpcode(*DefMI);
+      if (Opcode &&
+          SIShrinkInstructions::shouldSwitchOperands(*MRI, MI, *TII)) {
+        auto cmpDL = DefMI->getDebugLoc();
+        *NewVCC = MRI->createVirtualRegister(MRI->getRegClass(Reg));
+        *OldVCC = Reg;
+        MachineInstrBuilder InverseCompare = BuildMI(
+            *DefMI->getParent(), DefMI, cmpDL, TII->get(Opcode), *NewVCC);
+        InverseCompare->setFlags(DefMI->getFlags());
+
+        unsigned OpNum = DefMI->getNumExplicitOperands();
+        for (unsigned i = 1; i < OpNum; i++) {
+          MachineOperand Op = DefMI->getOperand(i);
+          InverseCompare.add(Op);
+          if (Op.isReg() && Op.isKill())
+            InverseCompare->getOperand(i).setIsKill(false);
+        }
+      }
+    }
+  }
+  if (*OldVCC == Reg) {
+    BuildMI(*MI.getParent(), MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64),
+            MI.getOperand(0).getReg())
+        .add(MI.getOperand(3))
+        .add(MI.getOperand(4))
+        .add(MI.getOperand(1))
+        .add(MI.getOperand(2))
+        .addReg(*NewVCC);
+    MI.eraseFromParent();
+    return true;
+  }
+  return false;
+}
+
 bool SIShrinkInstructions::run(MachineFunction &MF) {
 
   this->MF = &MF;
@@ -842,6 +950,8 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
   unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
 
   std::vector<unsigned> I1Defs;
+  Register OldVCC = AMDGPU::NoRegister;
+  Register NewVCC = AMDGPU::NoRegister;
 
   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
                                                   BI != BE; ++BI) {
@@ -973,6 +1083,10 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
         continue;
       }
 
+      if (MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64 &&
+          trySwitchOperands(MI, &OldVCC, &NewVCC))
+        MRI->setRegAllocationHint(NewVCC, 0, VCCReg);
+
       // If there is no chance we will shrink it and use VCC as sdst to get
       // a 32 bit form try to replace dead sdst with NULL.
       if (TII->isVOP3(MI.getOpcode())) {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
index 31a229a908142..8921950af5f8b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
@@ -13,9 +13,9 @@ define float @dyn_extract_v8f32_const_s_v(i32 %sel) {
 ; GCN-NEXT:    v_cndmask_b32_e64 v6, 1.0, 2.0, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 3, v0
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0x40a00000
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, 4.0, v1, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
 ; GCN-NEXT:    v_mov_b32_e32 v3, 0x40c00000
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
@@ -34,18 +34,18 @@ define float @dyn_extract_v8f32_const_s_v(i32 %sel) {
 ; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
 ; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
-; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
-; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
-; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
-; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
-; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
-; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
-; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
-; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
-; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
-; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
-; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
-; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v0, v1, 0x41000000, vcc_lo
+; GFX10PLUS-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 2, v0
+; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, 0x40400000, v1, vcc_lo
+; GFX10PLUS-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 3, v0
+; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo
+; GFX10PLUS-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 4, v0
+; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, 0x40a00000, v1, vcc_lo
+; GFX10PLUS-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 5, v0
+; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, 0x40c00000, v1, vcc_lo
+; GFX10PLUS-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 6, v0
+; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, 0x40e00000, v1, vcc_lo
+; GFX10PLUS-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 7, v0
+; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, 0x41000000, v1, vcc_lo
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
@@ -3385,9 +3385,9 @@ define float @dyn_extract_v15f32_const_s_v(i32 %sel) {
 ; GCN-NEXT:    v_cndmask_b32_e64 v13, 1.0, 2.0, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, v13, v1, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 3, v0
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0x40a00000
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, 4.0, v1, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
 ; GCN-NEXT:    v_mov_b32_e32 v3, 0x40c00000
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
@@ -3429,32 +3429,32 @@ define float @dyn_extract_v15f32_const_s_v(i32 %sel) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 2, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x40400000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 3, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 4, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x40a00000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 5, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x40c00000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 6, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x40e00000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 7, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x41000000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 8, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x41100000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 9, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x41200000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 10, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x41300000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 11, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x41400000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 12, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x41500000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 13, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x41600000, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 14, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x41700000, v1, vcc_lo
 ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 15, v0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s4, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -3464,32 +3464,32 @@ define float @dyn_extract_v15f32_const_s_v(i32 %sel) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 2, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x40400000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 3, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 4, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x40a00000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 5, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x40c00000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 6, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x40e00000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 7, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x41000000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 8, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x41100000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 9, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x41200000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 10, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x41300000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 11, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x41400000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 12, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x41500000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 13, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x41600000, v1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 14, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x41700000, v1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 15, v0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, v1, s0, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index 723ad5646c0a3..03b713f6866a0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -5155,8 +5155,8 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX8-NEXT:    s_and_b32 s0, 1, s2
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
+; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -5202,8 +5202,8 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX9-NEXT:    s_and_b32 s0, 1, s2
-; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
+; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -5241,16 +5241,16 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
 ; GFX10-NEXT:    s_and_b32 s0, 1, s10
 ; GFX10-NEXT:    s_cmp_eq_u64 s[6:7], 0
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
-; GFX10-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
+; GFX10-NEXT:    s_cselect_b32 s1, 1, 0
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
 ; GFX10-NEXT:    s_and_b32 s1, 1, s1
-; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, s1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, 0, s0
-; GFX10-NEXT:    v_mov_b32_e32 v2, s5
 ; GFX10-NEXT:    s_ashr_i32 s0, s9, 31
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s1
 ; GFX10-NEXT:    s_add_i32 s1, s0, 0x80000000
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc_lo
+; GFX10-NEXT:    v_mov_b32_e32 v2, s5
 ; GFX10-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -5282,16 +5282,15 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
 ; GFX11-NEXT:    s_and_b32 s0, 1, s10
 ; GFX11-NEXT:    s_cmp_eq_u64 s[6:7], 0
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
-; GFX11-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
+; GFX11-NEXT:    s_cselect_b32 s1, 1, 0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
 ; GFX11-NEXT:    s_and_b32 s1, 1, s1
-; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, 0, s0
-; GFX11-NEXT:    v_mov_b32_e32 v2, s5
 ; GFX11-NEXT:    s_ashr_i32 s0, s9, 31
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s1
 ; GFX11-NEXT:    s_add_i32 s1, s0, 0x80000000
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, 0, v2 :: v_dual_mov_b32 v2, s5
 ; GFX11-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX11-NEXT:    v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 1, v0
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
@@ -5511,8 +5510,8 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX8-NEXT:    s_and_b32 s0, 1, s4
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
+; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX8-NEXT:    v_bfrev_b32_e32 v1, 1
@@ -5545,8 +5544,8 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX9-NEXT:    s_and_b32 s0, 1, s4
-; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
+; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -5572,13 +5571,13 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[6:7], v[2:3]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s1
-; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
 ; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v8, 0, s0
+; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s0
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0, v8, vcc_lo
 ; GFX10-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
@@ -5602,18 +5601,18 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[6:7], v[2:3]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s1
-; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v1, v0 :: v_dual_add_nc_u32 v3, 0x80000000, v2
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v8, 0, s0
+; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0, v8, vcc_lo
 ; GFX11-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc_lo
-; GFX11-NEXT:    v_dual_cndmask_b32 v0, v4, v2 :: v_dual_cndmask_b32 v3, v7, v3
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc_lo
+; GFX11-NEXT:    v_dual_cndmask_b32 v2, v6, v2 :: v_dual_cndmask_b32 v3, v7, v3
 ; GFX11-NEXT:    ; return to shader part epilog
   %result = call i128 @llvm.sadd.sat.i128(i128 %lhs, i128 %rhs)
   %cast = bitcast i128 %result to <4 x float>
@@ -5982,8 +5981,8 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX8-NEXT:    s_and_b32 s0, 1, s2
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
+; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s0
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_ashr_i32 s0, s17, 31
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -6021,8 +6020,8 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
 ; GFX8-NEXT:    s_and_b32 s4, 1, s6
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, s4
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s4
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -6072,8 +6071,8 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX9-NEXT:    s_and_b32 s0, 1, s2
-; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
+; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s0
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    s_ashr_i32 s0, s17, 31
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -6111,8 +6110,8 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
 ; GFX9-NEXT:    s_and_b32 s4, 1, s6
-; GFX9-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, s4
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s4
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -6154,53 +6153,53 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
 ; GFX10-NEXT:    s_and_b32 s0, 1, s18
 ; GFX10-NEXT:    s_cmp_eq_u64 s[10:11], 0
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
-; GFX10-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
-; GFX10-NEXT:    s_and_b32 s1, 1, s1
+; GFX10-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX10-NEXT:    s_ashr_i32 s10, s17, 31
-; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, s1
-; GFX10-NEXT:    s_add_i32 s11, s10, 0x80000000
+; GFX10-NEXT:    s_and_b32 s1, 1, s1
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, 0, s0
+; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s1
+; GFX10-NEXT:    s_add_i32 s11, s10, 0x80000000
 ; GFX10-NEXT:    s_add_u32 s0, s4, s12
 ; GFX10-NEXT:    s_addc_u32 s1, s5, s13
 ; GFX10-NEXT:    s_addc_u32 s2, s6, s14
 ; GFX10-NEXT:    v_cmp_lt_u64_e64 s4, s[0:1], s[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc_lo
 ; GFX10-NEXT:    s_addc_u32 s3, s7, s15
-; GFX10-NEXT:    v_xor_b32_e32 v0, v1, v0
-; GFX10-NEXT:    s_cmp_eq_u64 s[2:3], s[6:7]
 ; GFX10-NEXT:    v_mov_b32_e32 v5, s0
-; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
+; GFX10-NEXT:    s_cmp_eq_u64 s[2:3], s[6:7]
+; GFX10-NEXT:    v_mov_b32_e32 v6, s1
+; GFX10-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s4
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s4, s[2:3], s[6:7]
+; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s6, s[14:15], 0
 ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX10-NEXT:    v_mov_b32_e32 v6, s1
 ; GFX10-NEXT:    v_mov_b32_e32 v7, s3
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s4
 ; GFX10-NEXT:    s_and_b32 s4, 1, s12
 ; GFX10-NEXT:    s_cmp_eq_u64 s[14:15], 0
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s6
-; GFX10-NEXT:    s_cselect_b32 s5, 1, 0
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s4
+; GFX10-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s6
 ; GFX10-NEXT:    s_and_b32 s5, 1, s5
-; GFX10-NEXT:    v_cmp_ne_u32_e64 s4, 0, s5
+; GFX10-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s5
+; GFX10-NEXT:    s_add_i32 s0, s4, 0x80000000
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0, v3, vcc_lo
+; GFX10-NEXT:    v_mov_b32_e32 v3, s8
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s16
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v3, 0, s4
-; GFX10-NEXT:    v_mov_b32_e32 v3, s8
-; GFX10-NEXT:    s_ashr_i32 s4, s3, 31
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, s10, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s10, vcc_lo
 ; GFX10-NEXT:    v_xor_b32_e32 v1, v2, v1
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s17
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, s10, vcc_lo
-; GFX10-NEXT:    s_add_i32 s0, s4, 0x80000000
-; GFX10-NEXT:    v_readfirstlane_b32 s1, v4
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, s10, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s10, vcc_lo
 ; GFX10-NEXT:    v_and_b32_e32 v1, 1, v1
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
+; GFX10-NEXT:    v_readfirstlane_b32 s1, v4
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX10-NEXT:    v_readfirstlane_b32 s2, v0
@@ -6231,42 +6230,43 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
 ; GFX11-NEXT:    s_and_b32 s0, 1, s18
 ; GFX11-NEXT:    s_cmp_eq_u64 s[10:11], 0
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
-; GFX11-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
-; GFX11-NEXT:    s_and_b32 s1, 1, s1
+; GFX11-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX11-NEXT:    s_ashr_i32 s10, s17, 31
-; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, 0, s1
-; GFX11-NEXT:    s_add_i32 s11, s10, 0x80000000
+; GFX11-NEXT:    s_and_b32 s1, 1, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, 0, s0
+; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s1
+; GFX11-NEXT:    s_add_i32 s11, s10, 0x80000000
 ; GFX11-NEXT:    s_add_u32 s0, s4, s12
 ; GFX11-NEXT:    s_addc_u32 s1, s5, s13
 ; GFX11-NEXT:    s_addc_u32 s2, s6, s14
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_cmp_lt_u64_e64 s4, s[0:1], s[4:5]
 ; GFX11-NEXT:    s_addc_u32 s3, s7, s15
+; GFX11-NEXT:    v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v7, s3
 ; GFX11-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX11-NEXT:    s_cmp_eq_u64 s[2:3], s[6:7]
-; GFX11-NEXT:    v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v7, s3
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s4
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s4, s[2:3], s[6:7]
 ; GFX11-NEXT:    s_cselect_b32 s12, 1, 0
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s6, s[14:15], 0
-; GFX11-NEXT:    v_dual_mov_b32 v5, s0 :: v_dual_and_b32 v0, 1, v0
+; GFX11-NEXT:    v_mov_b32_e32 v5, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s4
 ; GFX11-NEXT:    s_and_b32 s4, 1, s12
 ; GFX11-NEXT:    s_cmp_eq_u64 s[14:15], 0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s6
-; GFX11-NEXT:    s_cselect_b32 s5, 1, 0
 ; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s4
+; GFX11-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX11-NEXT:    s_and_b32 s5, 1, s5
-; GFX11-NEXT:    v_cmp_ne_u32_e64 s4, 0, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s6
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v3, 0, s4
-; GFX11-NEXT:    v_mov_b32_e32 v3, s8
+; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s5
+; GFX11-NEXT:    s_ashr_i32 s4, s3, 31
+; GFX11-NEXT:    s_add_i32 s0, s4, 0x80000000
+; GFX11-NEXT:    v_dual_cndmask_b32 v2, 0, v3 :: v_dual_mov_b32 v3, s8
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s16
-; GFX11-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX11-NEXT:    v_xor_b32_e32 v1, v2, v1
 ; GFX11-NEXT:    v_mov_b32_e32 v4, s9
 ; GFX11-NEXT:    v_mov_b32_e32 v2, s17
@@ -6275,7 +6275,6 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX11-NEXT:    v_and_b32_e32 v1, 1, v1
 ; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, s10, vcc_lo
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
-; GFX11-NEXT:    s_add_i32 s0, s4, 0x80000000
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX11-NEXT:    v_readfirstlane_b32 s1, v4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll
index ee3bf96111994..1991afa83870a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll
@@ -18,8 +18,8 @@ define float @test_s32(float %a) #0 {
 ; GCN-LABEL: test_s32:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %fcmp = fcmp olt float %a, 0.0
@@ -111,10 +111,10 @@ define <2 x float> @test_v2s32(<2 x float> %a) #0 {
 ; GCN-LABEL: test_v2s32:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v1
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v1
+; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %fcmp = fcmp olt <2 x float> %a, zeroinitializer
@@ -126,14 +126,14 @@ define <4 x float> @test_v4s32(<4 x float> %a) #0 {
 ; GCN-LABEL: test_v4s32:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v1
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v2
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v3
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
+; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v1
+; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v2
+; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v3
+; GCN-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %fcmp = fcmp olt <4 x float> %a, zeroinitializer
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
index d9158e3558395..536504747c971 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
@@ -2835,9 +2835,9 @@ define i48 @v_uaddsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v3
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, v1, v2
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, -1, vcc
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v2, -1, vcc
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_uaddsat_i48:
@@ -2944,10 +2944,10 @@ define amdgpu_ps <2 x float> @uaddsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, v1, v2
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v3
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v2, -1, vcc
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, -1, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
 ; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
@@ -3003,10 +3003,10 @@ define amdgpu_ps <2 x float> @uaddsat_i48_vs(i48 %lhs, i48 inreg %rhs) {
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, v1, v2
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v3
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v2, -1, vcc
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, -1, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
 ; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
index 1fd139b06417f..1944d1577ae29 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
@@ -2705,9 +2705,9 @@ define i48 @v_usubsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v3
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, v1, v2
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_usubsat_i48:
@@ -2815,9 +2815,9 @@ define amdgpu_ps <2 x float> @usubsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v3
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, v1, v2
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: usubsat_i48_sv:
@@ -2873,9 +2873,9 @@ define amdgpu_ps <2 x float> @usubsat_i48_vs(i48 %lhs, i48 inreg %rhs) {
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v3
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, v1, v2
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: usubsat_i48_vs:
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
index 7fdc012d4f1b5..7d24f0f6243a1 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
@@ -213,10 +213,10 @@ define i32 @select_sdiv_lhs_opaque_const1_i32(i1 %cond) {
 ; GCN-NEXT:    s_addc_u32 s5, s5, gv at gotpcrel32@hi+12
 ; GCN-NEXT:    s_load_dword s4, s[4:5], 0x0
 ; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v1, s4
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 5, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 5, v1, vcc
 ; GCN-NEXT:    v_sub_u32_e32 v1, vcc, 0, v0
 ; GCN-NEXT:    v_max_i32_e32 v1, v0, v1
 ; GCN-NEXT:    v_cvt_f32_u32_e32 v2, v1
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index b4d450a90d595..179c0bd1fdfc8 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -1099,9 +1099,9 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out,
 ; GFX10-GISEL-NEXT:    global_load_dword v0, v0, s[2:3]
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v1, v0
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v1, 32, v1
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, -1, vcc_lo
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v0, -1, v1, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
@@ -1327,8 +1327,8 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(ptr addrspace(1) noalias %
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v0, v0
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v0, 32, v0
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 32, v0
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, -1, vcc_lo
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 32, v0
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc_lo
 ; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
 ;
@@ -1564,10 +1564,10 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
 ; GFX10-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v1, v0
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v1, 32, v1
 ; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, 0xffe8, v1
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0xffff, vcc_lo
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0xffff, v1, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-GISEL-NEXT:    global_store_byte v1, v0, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
@@ -1684,11 +1684,11 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
 ; GFX10-GISEL-NEXT:    global_load_ushort v1, v0, s[2:3]
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v2, v1
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v2, 32, v2
 ; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v2, -16, v2
 ; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v1, v2, 0xffff, vcc_lo
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0xffff, v2, vcc_lo
 ; GFX10-GISEL-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
 ;
@@ -1805,10 +1805,10 @@ define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(ptr addrspace(1) noalias %out,
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 0x7f, v0
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v1, v0
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v1, 32, v1
 ; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, 0xffe7, v1
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0x7f, vcc_lo
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0x7f, v1, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 0x7f, v0
 ; GFX10-GISEL-NEXT:    global_store_byte v1, v0, s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
index 65ee228b64c6a..765ca10b09f79 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
@@ -1542,8 +1542,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(ptr addrspace(1) no
 ; GFX9-GISEL-NEXT:    global_load_dword v0, v0, s[2:3]
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    v_ffbh_u32_e32 v1, v0
-; GFX9-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, -1, vcc
+; GFX9-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, -1, v1, vcc
 ; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
@@ -1809,9 +1809,9 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(ptr addrspa
 ; GFX9-GISEL-NEXT:    global_load_dword v0, v0, s[2:3]
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    v_ffbh_u32_e32 v2, v0
-; GFX9-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v0, v2, -1, vcc
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, -1, v2, vcc
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v2, 1, 0, vcc
 ; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    global_store_byte v[0:1], v2, off
@@ -1897,8 +1897,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(ptr addrspace(1) noali
 ; GFX9-GISEL-NEXT:    global_load_dword v0, v0, s[2:3]
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    v_ffbh_u32_e32 v1, v0
-; GFX9-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX9-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
@@ -2066,8 +2066,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(ptr addrspace(1
 ; GFX9-GISEL-NEXT:    global_load_dword v0, v0, s[2:3]
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    v_ffbh_u32_e32 v1, v0
-; GFX9-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX9-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/cttz.ll b/llvm/test/CodeGen/AMDGPU/cttz.ll
index f0c278a67c8bc..cc128d0d02dad 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz.ll
@@ -951,9 +951,9 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out,
 ; GFX10-GISEL-NEXT:    global_load_dword v0, v0, s[2:3]
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    v_ffbl_b32_e32 v1, v0
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v1, 32, v1
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, -1, vcc_lo
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v0, -1, v1, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
@@ -1153,8 +1153,8 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_bitwidth(ptr addrspace(1) noalias %
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    v_ffbl_b32_e32 v0, v0
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v0, 32, v0
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 32, v0
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, -1, vcc_lo
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 32, v0
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc_lo
 ; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1463,10 +1463,10 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
 ; GFX10-GISEL-NEXT:    global_load_ushort v1, v0, s[2:3]
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    v_or_b32_e32 v2, 0x10000, v1
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
 ; GFX10-GISEL-NEXT:    v_ffbl_b32_e32 v2, v2
 ; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v1, v2, 0xffff, vcc_lo
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0xffff, v2, vcc_lo
 ; GFX10-GISEL-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %val = load i16, ptr addrspace(1) %valptr
@@ -1567,8 +1567,8 @@ define amdgpu_kernel void @v_cttz_i7_sel_eq_neg1(ptr addrspace(1) noalias %out,
 ; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, 0x80, v0
 ; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 0x7f, v0
 ; GFX10-GISEL-NEXT:    v_ffbl_b32_e32 v1, v1
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0x7f, vcc_lo
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0x7f, v1, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 0x7f, v0
 ; GFX10-GISEL-NEXT:    global_store_byte v1, v0, s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
index 777f363fedf9a..8c88c97c327c3 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
@@ -1170,8 +1170,8 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out,
 ; GFX9-GISEL-NEXT:    v_or3_b32 v1, v2, v3, v1
 ; GFX9-GISEL-NEXT:    v_ffbl_b32_e32 v2, v1
 ; GFX9-GISEL-NEXT:    v_min_u32_e32 v2, 32, v2
-; GFX9-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v1, v2, -1, vcc
+; GFX9-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
 ; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %val = load i32, ptr addrspace(1) %arrayidx, align 1
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
index 993f162921663..4fe4992430870 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
@@ -323,8 +323,8 @@ define float @fmul_select_f32_test6(float %x, i32 %bool.arg1, i32 %bool.arg2) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v3, 0x41000000
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v1, v3, 0xc0400000, vcc_lo
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0xc0400000, v3, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -342,9 +342,9 @@ define float @fmul_select_f32_test6(float %x, i32 %bool.arg1, i32 %bool.arg2) {
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 0x41000000
-; GFX11-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v1, v2
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v1, v3, 0xc0400000, vcc_lo
+; GFX11-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0xc0400000, v3, vcc_lo
 ; GFX11-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %bool = icmp eq i32 %bool.arg1, %bool.arg2
@@ -358,8 +358,8 @@ define float @fmul_select_f32_test7_sel_log2val_pos59_pos92(float %x, i32 %bool.
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v3, 0x5c
-; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT:    v_cndmask_b32_e64 v1, v3, 59, vcc
+; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, v1, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, 59, v3, vcc
 ; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -367,8 +367,8 @@ define float @fmul_select_f32_test7_sel_log2val_pos59_pos92(float %x, i32 %bool.
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v3, 0x5c
-; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v3, 59, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, v1, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, 59, v3, vcc
 ; GFX9-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -448,8 +448,8 @@ define float @fmul_select_f32_test8(float %x, i32 %bool.arg1, i32 %bool.arg2) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc1000000
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v1, v3, 0x41800000, vcc_lo
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0x41800000, v3, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -467,9 +467,9 @@ define float @fmul_select_f32_test8(float %x, i32 %bool.arg1, i32 %bool.arg2) {
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc1000000
-; GFX11-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v1, v2
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v1, v3, 0x41800000, vcc_lo
+; GFX11-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0x41800000, v3, vcc_lo
 ; GFX11-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %bool = icmp eq i32 %bool.arg1, %bool.arg2
@@ -653,8 +653,8 @@ define float @fmul_select_f32_test12_sel_log2val_neg48_pos68(float %x, i32 %bool
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v3, 0x44
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v1, v3, 0xffffffd0, vcc_lo
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0xffffffd0, v3, vcc_lo
 ; GFX10-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -672,9 +672,9 @@ define float @fmul_select_f32_test12_sel_log2val_neg48_pos68(float %x, i32 %bool
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 0x44
-; GFX11-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v1, v2
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v1, v3, 0xffffffd0, vcc_lo
+; GFX11-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0xffffffd0, v3, vcc_lo
 ; GFX11-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %bool = icmp eq i32 %bool.arg1, %bool.arg2
@@ -946,8 +946,8 @@ define double @fmul_select_f64_test7(double %x, i32 %bool.arg1, i32 %bool.arg2)
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-SDAG-NEXT:    v_mov_b32_e32 v4, 0xbff00000
-; GFX7-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX7-SDAG-NEXT:    v_cndmask_b32_e64 v3, v4, 2.0, vcc
+; GFX7-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, v2, v3
+; GFX7-SDAG-NEXT:    v_cndmask_b32_e32 v3, 2.0, v4, vcc
 ; GFX7-SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX7-SDAG-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -956,9 +956,9 @@ define double @fmul_select_f64_test7(double %x, i32 %bool.arg1, i32 %bool.arg2)
 ; GFX7-GISEL:       ; %bb.0:
 ; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-GISEL-NEXT:    v_mov_b32_e32 v5, 0xbff00000
-; GFX7-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, v2, v3
 ; GFX7-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; GFX7-GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, 2.0, vcc
+; GFX7-GISEL-NEXT:    v_cndmask_b32_e32 v5, 2.0, v5, vcc
 ; GFX7-GISEL-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
 ; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -966,8 +966,8 @@ define double @fmul_select_f64_test7(double %x, i32 %bool.arg1, i32 %bool.arg2)
 ; GFX9-SDAG:       ; %bb.0:
 ; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-SDAG-NEXT:    v_mov_b32_e32 v4, 0xbff00000
-; GFX9-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v3, v4, 2.0, vcc
+; GFX9-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, v2, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, 2.0, v4, vcc
 ; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-SDAG-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -976,9 +976,9 @@ define double @fmul_select_f64_test7(double %x, i32 %bool.arg1, i32 %bool.arg2)
 ; GFX9-GISEL:       ; %bb.0:
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-GISEL-NEXT:    v_mov_b32_e32 v5, 0xbff00000
-; GFX9-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, v2, v3
 ; GFX9-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, 2.0, vcc
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v5, 2.0, v5, vcc
 ; GFX9-GISEL-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
 ; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1170,9 +1170,9 @@ define <2 x double> @fmul_select_v2f64_test10(<2 x double> %x, <2 x i32> %bool.a
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v9, 0xbff00000
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v4, v6
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v8, 0
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, 0x3fe00000, vcc_lo
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v9, 0x3fe00000, v9, vcc_lo
 ; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v5, v7
 ; GFX10-GISEL-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
 ; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc_lo
@@ -1197,9 +1197,9 @@ define <2 x double> @fmul_select_v2f64_test10(<2 x double> %x, <2 x i32> %bool.a
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_dual_mov_b32 v9, 0xbff00000 :: v_dual_mov_b32 v8, 0
-; GFX11-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX11-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v4, v6
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, 0x3fe00000, vcc_lo
+; GFX11-GISEL-NEXT:    v_cndmask_b32_e32 v9, 0x3fe00000, v9, vcc_lo
 ; GFX11-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v5, v7
 ; GFX11-GISEL-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
 ; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc_lo
@@ -1217,8 +1217,8 @@ define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2)
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-SDAG-NEXT:    v_bfrev_b32_e32 v4, 1
-; GFX7-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX7-SDAG-NEXT:    v_cndmask_b32_e64 v3, v4, -2.0, vcc
+; GFX7-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, v2, v3
+; GFX7-SDAG-NEXT:    v_cndmask_b32_e32 v3, -2.0, v4, vcc
 ; GFX7-SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX7-SDAG-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
 ; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -1227,9 +1227,9 @@ define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2)
 ; GFX7-GISEL:       ; %bb.0:
 ; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-GISEL-NEXT:    v_bfrev_b32_e32 v5, 1
-; GFX7-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, v2, v3
 ; GFX7-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; GFX7-GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, -2.0, vcc
+; GFX7-GISEL-NEXT:    v_cndmask_b32_e32 v5, -2.0, v5, vcc
 ; GFX7-GISEL-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
 ; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1237,8 +1237,8 @@ define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2)
 ; GFX9-SDAG:       ; %bb.0:
 ; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-SDAG-NEXT:    v_bfrev_b32_e32 v4, 1
-; GFX9-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v3, v4, -2.0, vcc
+; GFX9-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, v2, v3
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v3, -2.0, v4, vcc
 ; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-SDAG-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -1247,9 +1247,9 @@ define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2)
 ; GFX9-GISEL:       ; %bb.0:
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v5, 1
-; GFX9-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, v2, v3
 ; GFX9-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, -2.0, vcc
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v5, -2.0, v5, vcc
 ; GFX9-GISEL-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
 ; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1282,9 +1282,9 @@ define double @fmul_select_f64_test12(double %x, i32 %bool.arg1, i32 %bool.arg2)
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_bfrev_b32_e32 v5, 1
-; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, v2, v3
 ; GFX7-NEXT:    v_mov_b32_e32 v4, 0
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GFX7-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1292,9 +1292,9 @@ define double @fmul_select_f64_test12(double %x, i32 %bool.arg1, i32 %bool.arg2)
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_bfrev_b32_e32 v5, 1
-; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, v2, v3
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GFX9-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1327,9 +1327,9 @@ define double @fmul_select_f64_test13(double %x, i32 %bool.arg1, i32 %bool.arg2)
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v5, 0x40300000
-; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, v2, v3
 ; GFX7-NEXT:    v_mov_b32_e32 v4, 0
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GFX7-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1337,9 +1337,9 @@ define double @fmul_select_f64_test13(double %x, i32 %bool.arg1, i32 %bool.arg2)
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v5, 0x40300000
-; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, v2, v3
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GFX9-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1421,8 +1421,8 @@ define double @fmul_select_f64_test14_sel_log2val_pos92_neg27(double %x, i32 %bo
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_not_b32_e32 v4, 26
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v2, v4, 0x5c, vcc_lo
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v2, v3
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0x5c, v4, vcc_lo
 ; GFX10-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1440,9 +1440,9 @@ define double @fmul_select_f64_test14_sel_log2val_pos92_neg27(double %x, i32 %bo
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_not_b32_e32 v4, 26
-; GFX11-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX11-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v2, v3
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v2, v4, 0x5c, vcc_lo
+; GFX11-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0x5c, v4, vcc_lo
 ; GFX11-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %bool = icmp eq i32 %bool.arg1, %bool.arg2
@@ -1505,8 +1505,8 @@ define double @fmul_select_f64_test15_sel_log2val_neg42_neg33(double %x, i32 %bo
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_not_b32_e32 v4, 32
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v2, v4, 0xffffffd6, vcc_lo
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v2, v3
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0xffffffd6, v4, vcc_lo
 ; GFX10-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1524,9 +1524,9 @@ define double @fmul_select_f64_test15_sel_log2val_neg42_neg33(double %x, i32 %bo
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_not_b32_e32 v4, 32
-; GFX11-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX11-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v2, v3
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v2, v4, 0xffffffd6, vcc_lo
+; GFX11-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0xffffffd6, v4, vcc_lo
 ; GFX11-GISEL-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %bool = icmp eq i32 %bool.arg1, %bool.arg2
@@ -2073,8 +2073,8 @@ define half @fmul_select_f16_test6(half %x, i32 %bool.arg1, i32 %bool.arg2) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v3, 0x4200
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v1, v3, 0xc800, vcc_lo
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0xc800, v3, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mul_f16_e32 v0, v0, v1
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2092,9 +2092,9 @@ define half @fmul_select_f16_test6(half %x, i32 %bool.arg1, i32 %bool.arg2) {
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 0x4200
-; GFX11-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v1, v2
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v1, v3, 0xc800, vcc_lo
+; GFX11-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0xc800, v3, vcc_lo
 ; GFX11-GISEL-NEXT:    v_mul_f16_e32 v0, v0, v1
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %bool = icmp eq i32 %bool.arg1, %bool.arg2
@@ -2161,8 +2161,8 @@ define half @fmul_select_f16_test7(half %x, i32 %bool.arg1, i32 %bool.arg2) {
 ; GFX10-GISEL:       ; %bb.0:
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc400
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v1, v3, 0x4800, vcc_lo
+; GFX10-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0x4800, v3, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mul_f16_e32 v0, v0, v1
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2180,9 +2180,9 @@ define half @fmul_select_f16_test7(half %x, i32 %bool.arg1, i32 %bool.arg2) {
 ; GFX11-GISEL:       ; %bb.0:
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc400
-; GFX11-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v1, v2
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v1, v3, 0x4800, vcc_lo
+; GFX11-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0x4800, v3, vcc_lo
 ; GFX11-GISEL-NEXT:    v_mul_f16_e32 v0, v0, v1
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %bool = icmp eq i32 %bool.arg1, %bool.arg2
@@ -2876,8 +2876,8 @@ define bfloat @fmul_select_bf16_test5(bfloat %x, i32 %bool.arg1, i32 %bool.arg2)
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7-NEXT:    v_mov_b32_e32 v3, 0x41000000
-; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT:    v_cndmask_b32_e64 v1, v3, 2.0, vcc
+; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, v1, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, 2.0, v3, vcc
 ; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
 ; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index 06c0417211809..efd633d21dba1 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -1287,11 +1287,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-NEXT:    v_xor_b32_e32 v6, 0x7f, v0
 ; GFX9-G-NEXT:    v_or_b32_e32 v14, v6, v2
 ; GFX9-G-NEXT:    v_and_b32_e32 v6, 1, v20
-; GFX9-G-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; GFX9-G-NEXT:    v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX9-G-NEXT:    v_cndmask_b32_e64 v7, v9, 0, vcc
-; GFX9-G-NEXT:    v_cndmask_b32_e64 v12, v10, 0, vcc
-; GFX9-G-NEXT:    v_cndmask_b32_e64 v13, v11, 0, vcc
+; GFX9-G-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
+; GFX9-G-NEXT:    v_cndmask_b32_e32 v6, 0, v8, vcc
+; GFX9-G-NEXT:    v_cndmask_b32_e32 v7, 0, v9, vcc
+; GFX9-G-NEXT:    v_cndmask_b32_e32 v12, 0, v10, vcc
+; GFX9-G-NEXT:    v_cndmask_b32_e32 v13, 0, v11, vcc
 ; GFX9-G-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
 ; GFX9-G-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
 ; GFX9-G-NEXT:    v_or_b32_e32 v14, v20, v14
@@ -3414,11 +3414,11 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-NEXT:    v_xor_b32_e32 v8, 0x7f, v12
 ; GFX9-G-NEXT:    v_or_b32_e32 v16, v8, v14
 ; GFX9-G-NEXT:    v_and_b32_e32 v8, 1, v18
-; GFX9-G-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
-; GFX9-G-NEXT:    v_cndmask_b32_e64 v10, v0, 0, vcc
-; GFX9-G-NEXT:    v_cndmask_b32_e64 v11, v1, 0, vcc
-; GFX9-G-NEXT:    v_cndmask_b32_e64 v8, v2, 0, vcc
-; GFX9-G-NEXT:    v_cndmask_b32_e64 v9, v3, 0, vcc
+; GFX9-G-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v8
+; GFX9-G-NEXT:    v_cndmask_b32_e32 v10, 0, v0, vcc
+; GFX9-G-NEXT:    v_cndmask_b32_e32 v11, 0, v1, vcc
+; GFX9-G-NEXT:    v_cndmask_b32_e32 v8, 0, v2, vcc
+; GFX9-G-NEXT:    v_cndmask_b32_e32 v9, 0, v3, vcc
 ; GFX9-G-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
 ; GFX9-G-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
 ; GFX9-G-NEXT:    v_or_b32_e32 v16, v18, v16
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index 77b78f1f8a333..07d7276e3b944 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -495,13 +495,13 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_or_b32_e32 v8, v9, v8
 ; GISEL-NEXT:    v_and_b32_e32 v9, 1, v9
 ; GISEL-NEXT:    v_and_b32_e32 v8, 1, v8
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v22, v18, 0, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v22, 0, v18, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, v20, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, v21, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, 0, v20, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v21, vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
-; GISEL-NEXT:    v_cndmask_b32_e64 v23, v19, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v23, 0, v19, vcc
 ; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB0_6
 ; GISEL-NEXT:  ; %bb.1: ; %udiv-bb15
@@ -685,12 +685,12 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_or_b32_e32 v11, v14, v15
 ; GISEL-NEXT:    v_and_b32_e32 v14, 1, v11
 ; GISEL-NEXT:    v_or_b32_e32 v10, v11, v10
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, v6, 0, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v14
+; GISEL-NEXT:    v_cndmask_b32_e32 v14, 0, v6, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v16, 1, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, v7, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, v12, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v13, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v15, 0, v7, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v12, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v13, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v16
 ; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
@@ -1251,13 +1251,13 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_or_b32_e32 v2, v3, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, 1, v3
 ; GISEL-NEXT:    v_and_b32_e32 v2, 1, v2
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, v0, 0, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v18, 0, v0, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, v16, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v17, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v16, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v17, vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, v1, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v19, 0, v1, vcc
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB1_6
 ; GISEL-NEXT:  ; %bb.1: ; %udiv-bb15
@@ -1423,12 +1423,12 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_or_b32_e32 v9, v20, v10
 ; GISEL-NEXT:    v_and_b32_e32 v10, 1, v9
 ; GISEL-NEXT:    v_or_b32_e32 v8, v9, v8
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, v4, 0, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v4, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v20, 1, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v5, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, v6, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, v7, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, 0, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v7, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v20
 ; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
@@ -2093,13 +2093,13 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_or_b32_e32 v18, v19, v18
 ; GISEL-NEXT:    v_and_b32_e32 v19, 1, v19
 ; GISEL-NEXT:    v_and_b32_e32 v18, 1, v18
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v19
-; GISEL-NEXT:    v_cndmask_b32_e64 v31, v16, 0, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v19
+; GISEL-NEXT:    v_cndmask_b32_e32 v31, 0, v16, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, v8, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, v9, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v18, 0, v8, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v19, 0, v9, vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
-; GISEL-NEXT:    v_cndmask_b32_e64 v32, v17, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v32, 0, v17, vcc
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GISEL-NEXT:  ; %bb.1: ; %udiv-bb15
@@ -2283,12 +2283,12 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_or_b32_e32 v3, v20, v21
 ; GISEL-NEXT:    v_and_b32_e32 v20, 1, v3
 ; GISEL-NEXT:    v_or_b32_e32 v2, v3, v2
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, v12, 0, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v20
+; GISEL-NEXT:    v_cndmask_b32_e32 v20, 0, v12, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v22, 1, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v21, v13, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v7, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v21, 0, v13, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v7, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v22
 ; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
@@ -2920,13 +2920,13 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_or_b32_e32 v20, v21, v20
 ; GISEL-NEXT:    v_and_b32_e32 v21, 1, v21
 ; GISEL-NEXT:    v_and_b32_e32 v20, 1, v20
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v21
-; GISEL-NEXT:    v_cndmask_b32_e64 v32, v0, 0, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v21
+; GISEL-NEXT:    v_cndmask_b32_e32 v32, 0, v0, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, v2, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v21, v3, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v20, 0, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v21, 0, v3, vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
-; GISEL-NEXT:    v_cndmask_b32_e64 v33, v1, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v33, 0, v1, vcc
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB3_6
 ; GISEL-NEXT:  ; %bb.1: ; %udiv-bb15
@@ -3092,12 +3092,12 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_or_b32_e32 v19, v26, v24
 ; GISEL-NEXT:    v_and_b32_e32 v24, 1, v19
 ; GISEL-NEXT:    v_or_b32_e32 v18, v19, v18
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v24
-; GISEL-NEXT:    v_cndmask_b32_e64 v24, v4, 0, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v24
+; GISEL-NEXT:    v_cndmask_b32_e32 v24, 0, v4, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v26, 1, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v25, v5, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, v6, 0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, v7, 0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v25, 0, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v18, 0, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v19, 0, v7, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v26
 ; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
index 8c91acd5ae024..33594db233729 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
@@ -3573,8 +3573,8 @@ define float @v_contract_mul_add_f32_select_1_64(i32 %arg, float %x, float %y) {
 ; GFX9-SDAG:       ; %bb.0:
 ; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, 0x42800000
-; GFX9-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v0, v3, 1.0, vcc
+; GFX9-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, 1.0, v3, vcc
 ; GFX9-SDAG-NEXT:    v_fma_f32 v0, v1, v0, v2
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -3691,8 +3691,8 @@ define float @v_contract_mul_add_f32_select_n1_n64(i32 %arg, float %x, float %y)
 ; GFX9-SDAG:       ; %bb.0:
 ; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, 0xc2800000
-; GFX9-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v0, v3, -1.0, vcc
+; GFX9-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, -1.0, v3, vcc
 ; GFX9-SDAG-NEXT:    v_fma_f32 v0, v1, v0, v2
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -3897,8 +3897,8 @@ define float @v_contract_mul_add_f32_select_4_128(i32 %arg, float %x, float %y)
 ; GFX9-SDAG:       ; %bb.0:
 ; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-SDAG-NEXT:    v_mov_b32_e32 v3, 0x43000000
-; GFX9-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v0, v3, 4.0, vcc
+; GFX9-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v0, 4.0, v3, vcc
 ; GFX9-SDAG-NEXT:    v_fma_f32 v0, v1, v0, v2
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4545,8 +4545,8 @@ define double @v_contract_mul_add_f64_select_2_4(i32 %arg, double %x, double %y)
 ; GFX9-SDAG:       ; %bb.0:
 ; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-SDAG-NEXT:    v_mov_b32_e32 v5, 0x40100000
-; GFX9-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v6, v5, 2.0, vcc
+; GFX9-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX9-SDAG-NEXT:    v_cndmask_b32_e32 v6, 2.0, v5, vcc
 ; GFX9-SDAG-NEXT:    v_mov_b32_e32 v5, 0
 ; GFX9-SDAG-NEXT:    v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 14f7cbcd0f438..7f51cbec5dc4e 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -2836,9 +2836,9 @@ define float @v_fneg_select_infloop_regression_f32(float %arg, i1 %arg1) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, 0, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -v0, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %i = select i1 %arg1, float 0.0, float %arg
   %i2 = fneg float %i
@@ -2897,9 +2897,9 @@ define float @v_fneg_select_infloop_regression_inline_imm_f32(float %arg, i1 %ar
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 2.0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, 2.0, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 2.0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, 2.0, -v0, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %i = select i1 %arg1, float 2.0, float %arg
   %i2 = fneg float %i
@@ -2958,9 +2958,9 @@ define float @v_fneg_select_infloop_regression_neg_inline_imm_f32(float %arg, i1
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, -2.0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, -2.0, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, -2.0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, -2.0, -v0, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %i = select i1 %arg1, float -2.0, float %arg
   %i2 = fneg float %i
@@ -3066,10 +3066,10 @@ define double @v_fneg_select_infloop_regression_f64(double %arg, i1 %arg1) {
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_and_b32_e32 v2, 1, v2
 ; GCN-NEXT:    v_bfrev_b32_e32 v3, 1
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
-; GCN-NEXT:    v_cndmask_b32_e64 v1, -v1, v3, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v2
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, -v1, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %i = select i1 %arg1, double 0.0, double %arg
   %i2 = fneg double %i
@@ -3122,20 +3122,20 @@ define half @v_fneg_select_infloop_regression_f16(half %arg, i1 %arg1) {
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-NEXT:    v_and_b32_e32 v1, 1, v1
-; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v1
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; SI-NEXT:    v_cndmask_b32_e64 v0, -v0, 0, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SI-NEXT:    v_cndmask_b32_e64 v0, 0, -v0, vcc
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-LABEL: v_fneg_select_infloop_regression_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_and_b32_e32 v1, 1, v1
-; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
-; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v1
+; VI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
-; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-NEXT:    s_setpc_b64 s[30:31]
   %i = select i1 %arg1, half 0.0, half %arg
   %i2 = fneg half %i
@@ -3189,10 +3189,10 @@ define <2 x half> @v_fneg_select_infloop_regression_v2f16(<2 x half> %arg, i1 %a
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; SI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; SI-NEXT:    v_and_b32_e32 v1, 1, v2
-; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
-; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v1
+; SI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; SI-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
-; SI-NEXT:    v_cndmask_b32_e64 v1, v0, 0, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v1
 ; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
@@ -3202,10 +3202,10 @@ define <2 x half> @v_fneg_select_infloop_regression_v2f16(<2 x half> %arg, i1 %a
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_and_b32_e32 v1, 1, v1
-; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
-; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v1
+; VI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
-; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-NEXT:    s_setpc_b64 s[30:31]
   %i = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %arg
   %i2 = fneg <2 x half> %i
@@ -3264,11 +3264,11 @@ define <2 x float> @v_fneg_select_infloop_regression_v2f32(<2 x float> %arg, i1
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_and_b32_e32 v2, 1, v2
 ; GCN-NEXT:    v_bfrev_b32_e32 v3, 1
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
-; GCN-NEXT:    v_cndmask_b32_e64 v1, -v1, v3, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, v3, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v2
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, -v1, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v3, -v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %i = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %arg
   %i2 = fneg <2 x float> %i
@@ -3316,9 +3316,9 @@ define float @v_fabs_select_infloop_regression_f32(float %arg, i1 %arg1) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v0, |v0|, 0, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, |v0|, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %i = select i1 %arg1, float 0.0, float %arg
   %i2 = call float @llvm.fabs.f32(float %i)
@@ -3367,9 +3367,9 @@ define float @v_fneg_fabs_select_infloop_regression(float %arg, i1 %arg1) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v0, -|v0|, 0, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -|v0|, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %i = select i1 %arg1, float 0.0, float %arg
   %i2 = call float @llvm.fabs.f32(float %i)
diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
index 3465c782bd700..0ff60af86135b 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
@@ -241,10 +241,10 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v7
 ; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, v[4:5]
 ; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v6, v[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v7
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v7
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v11, v9, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v2, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, v2, s[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v10, v8, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v6
@@ -256,7 +256,7 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; GISEL-NEXT:    v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr9
 ; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v3, v8, v[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr6
@@ -605,10 +605,10 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v7
 ; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, v[4:5]
 ; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v6, v[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v7
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v7
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v11, v9, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v2, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, v2, s[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v10, v8, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v6
@@ -620,7 +620,7 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; GISEL-NEXT:    v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr9
 ; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v3, v8, v[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr6
@@ -962,10 +962,10 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v7
 ; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, v[4:5]
 ; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v6, v[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v7
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v7
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v11, v8, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v2, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, v2, s[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v10, v9, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v6
@@ -977,7 +977,7 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; GISEL-NEXT:    v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr8
 ; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v3, v9, v[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr6
@@ -1313,10 +1313,10 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v7
 ; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, v[4:5]
 ; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v6, v[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v7
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v7
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v11, v8, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v2, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, v2, s[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v10, v9, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v6
@@ -1328,7 +1328,7 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; GISEL-NEXT:    v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr8
 ; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v3, v9, v[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr6
@@ -1692,8 +1692,8 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v4, v[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, v2, 0, s[6:7]
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, v2, s[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v10, v8, v[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v11, v8, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v6
@@ -1705,7 +1705,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:    v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v10, v9, v[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr5
 ; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v3, v8, v[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr6_vgpr7
@@ -2039,8 +2039,8 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v4, v[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, v2, 0, s[6:7]
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, v2, s[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v10, v8, v[6:7]
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v11, v8, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v6
@@ -2052,7 +2052,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:    v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9]
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v10, v9, v[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr5
 ; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v3, v8, v[6:7]
 ; GISEL-NEXT:    ; implicit-def: $vgpr6_vgpr7
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
index 4b9da7b49e997..46c868b4559b4 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
@@ -1920,31 +1920,31 @@ define <8 x double> @double8_inselt_vec(<8 x double> %vec, i32 %sel) {
 ; GCN-LABEL: double8_inselt_vec:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v16
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v16
 ; GCN-NEXT:    v_mov_b32_e32 v17, 0x3ff00000
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v4, v4, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v5, v5, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v6, v6, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v7, v7, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v8, v8, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v9, v9, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v10, v10, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v11, v11, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v12, v12, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v13, v13, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v14, v14, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v15, v15, v17, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v17, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v17, v3, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 2, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v5, v17, v5, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 3, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v7, v17, v7, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 4, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v8, 0, v8, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v9, v17, v9, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 5, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v11, v17, v11, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 6, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v12, 0, v12, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v13, v17, v13, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 7, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v14, 0, v14, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v15, v17, v15, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %v = insertelement <8 x double> %vec, double 1.000000e+00, i32 %sel
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
index c316ec71863d0..6bfeda6a1a9e5 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
@@ -214,11 +214,11 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v11, v15, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, v12, v16, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v14
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v11, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, -1, v11, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v2, v9, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v10, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
@@ -459,11 +459,11 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v13
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v10, v14, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, v11, v15, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v13
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, 0, v8, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, v10, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, -1, v10, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v2, v8, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v9, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
@@ -746,11 +746,11 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, v12, v16, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v12, v13, v17, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v15
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v15
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v12, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, -1, v9, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, -1, v12, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v0, v0, v4
 ; GISEL-NEXT:    v_and_b32_e32 v1, v1, v5
 ; GISEL-NEXT:    v_and_or_b32 v0, v9, v2, v0
@@ -1023,11 +1023,11 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, v12, v16, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v12, v13, v17, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v15
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v15
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v12, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, -1, v8, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, -1, v12, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v2, v4, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v5, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v8, v0, v2
@@ -1305,11 +1305,11 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v11, v15, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, v12, v16, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v14
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v11, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, -1, v11, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v2, v9, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v10, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
@@ -1552,11 +1552,11 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v13
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v10, v14, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, v11, v15, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v13
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, 0, v8, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, v10, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, -1, v10, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v2, v8, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v9, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
index fdccacf372dfa..30806565da7df 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
@@ -69,9 +69,9 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v1
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v1
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -123,11 +123,11 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v3
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v1
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -179,11 +179,11 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v3
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v1
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
@@ -424,15 +424,15 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v5, v0
 ; VI-GISEL-NEXT:    v_ldexp_f32 v2, v2, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v3
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v6, 0x7f800000
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v5, v1
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v3
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v3
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v4
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
@@ -504,14 +504,14 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v5, v0
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v2, v2, v3
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v5, v1
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0
@@ -585,14 +585,14 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v5, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v2, v2, v3
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v5, v1
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
@@ -960,9 +960,9 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v5, v5
 ; VI-GISEL-NEXT:    v_ldexp_f32 v0, v0, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v3
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v4
 ; VI-GISEL-NEXT:    s_and_b32 s0, s2, 0xfffff000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v5, v5, v6
@@ -981,13 +981,13 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v6, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v3
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v5, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v3
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
 ; VI-GISEL-NEXT:    v_ldexp_f32 v2, v6, v2
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v3
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v3
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v4
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
@@ -1080,21 +1080,21 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
 ; GFX900-GISEL-NEXT:    v_add_f32_e32 v1, v6, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v3
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x42b17218
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v6, v1
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v5, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v2, v6, v2
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0
@@ -1189,21 +1189,21 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
 ; SI-GISEL-NEXT:    v_add_f32_e32 v1, v6, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v3
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x42b17218
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v6, v1
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v5, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v2, v6, v2
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
@@ -1719,8 +1719,8 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; VI-GISEL-NEXT:    v_add_f32_e32 v1, v6, v1
 ; VI-GISEL-NEXT:    v_cvt_i32_f32_e32 v6, v7
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
 ; VI-GISEL-NEXT:    s_and_b32 s0, s2, 0xfffff000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v6
@@ -1752,19 +1752,19 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; VI-GISEL-NEXT:    v_add_f32_e32 v2, v2, v3
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
 ; VI-GISEL-NEXT:    v_cvt_i32_f32_e32 v3, v8
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v8, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
 ; VI-GISEL-NEXT:    v_ldexp_f32 v3, v8, v3
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
@@ -1855,7 +1855,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v4
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    v_mul_f32_e32 v1, s1, v2
 ; GFX900-GISEL-NEXT:    v_fma_f32 v6, s1, v2, -v1
@@ -1865,7 +1865,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; GFX900-GISEL-NEXT:    v_add_f32_e32 v1, v1, v6
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v6, v7
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v6
@@ -1878,8 +1878,8 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v8, v9
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v6, v6
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v6, v6, v8
 ; GFX900-GISEL-NEXT:    v_mul_f32_e32 v8, s3, v2
 ; GFX900-GISEL-NEXT:    v_fma_f32 v2, s3, v2, -v8
@@ -1891,13 +1891,13 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v8, v2
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v3, v8, v3
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0
@@ -1989,7 +1989,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v4
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
 ; SI-GISEL-NEXT:    s_mov_b32 s6, -1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v1, s1, v2
@@ -2000,7 +2000,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; SI-GISEL-NEXT:    v_add_f32_e32 v1, v1, v6
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v6, v7
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v6
@@ -2013,8 +2013,8 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v8, v9
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v6, v6
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v6, v6, v8
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v8, s3, v2
 ; SI-GISEL-NEXT:    v_fma_f32 v2, s3, v2, -v8
@@ -2026,13 +2026,13 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v8, v2
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v3, v8, v3
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
 ; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
@@ -2565,9 +2565,9 @@ define float @v_exp_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2611,9 +2611,9 @@ define float @v_exp_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2657,9 +2657,9 @@ define float @v_exp_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2725,9 +2725,9 @@ define float @v_exp_fabs_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2771,9 +2771,9 @@ define float @v_exp_fabs_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2817,9 +2817,9 @@ define float @v_exp_fabs_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2886,9 +2886,9 @@ define float @v_exp_fneg_fabs_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -|v0|, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -|v0|, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2932,9 +2932,9 @@ define float @v_exp_fneg_fabs_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -|v0|, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -|v0|, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2978,9 +2978,9 @@ define float @v_exp_fneg_fabs_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -|v0|, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -|v0|, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3048,9 +3048,9 @@ define float @v_exp_fneg_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3094,9 +3094,9 @@ define float @v_exp_fneg_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3140,9 +3140,9 @@ define float @v_exp_fneg_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3400,8 +3400,8 @@ define float @v_exp_f32_ninf(float %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_ninf:
@@ -3438,8 +3438,8 @@ define float @v_exp_f32_ninf(float %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_ninf:
@@ -3476,8 +3476,8 @@ define float @v_exp_f32_ninf(float %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_ninf:
@@ -3770,9 +3770,9 @@ define float @v_exp_f32_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3816,9 +3816,9 @@ define float @v_exp_f32_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3862,9 +3862,9 @@ define float @v_exp_f32_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3928,9 +3928,9 @@ define float @v_exp_f32_nnan(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3974,9 +3974,9 @@ define float @v_exp_f32_nnan(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4020,9 +4020,9 @@ define float @v_exp_f32_nnan(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4086,9 +4086,9 @@ define float @v_exp_f32_nnan_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4132,9 +4132,9 @@ define float @v_exp_f32_nnan_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4178,9 +4178,9 @@ define float @v_exp_f32_nnan_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4244,9 +4244,9 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4290,9 +4290,9 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4336,9 +4336,9 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4397,8 +4397,8 @@ define float @v_exp_f32_ninf_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_ninf_daz:
@@ -4435,8 +4435,8 @@ define float @v_exp_f32_ninf_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_ninf_daz:
@@ -4473,8 +4473,8 @@ define float @v_exp_f32_ninf_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_ninf_daz:
@@ -4531,8 +4531,8 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_ninf_dynamic:
@@ -4569,8 +4569,8 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_ninf_dynamic:
@@ -4607,8 +4607,8 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_ninf_dynamic:
@@ -4665,8 +4665,8 @@ define float @v_exp_f32_nnan_ninf(float %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf:
@@ -4703,8 +4703,8 @@ define float @v_exp_f32_nnan_ninf(float %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf:
@@ -4741,8 +4741,8 @@ define float @v_exp_f32_nnan_ninf(float %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_nnan_ninf:
@@ -4799,8 +4799,8 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_daz:
@@ -4837,8 +4837,8 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_daz:
@@ -4875,8 +4875,8 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_nnan_ninf_daz:
@@ -4933,8 +4933,8 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
@@ -4971,8 +4971,8 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
@@ -5009,8 +5009,8 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_nnan_ninf_dynamic:
@@ -5100,9 +5100,9 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5146,9 +5146,9 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5192,9 +5192,9 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5244,9 +5244,9 @@ define float @v_exp_f32_undef() {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v1
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s4, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5283,9 +5283,9 @@ define float @v_exp_f32_undef() {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v1
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s4, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5322,9 +5322,9 @@ define float @v_exp_f32_undef() {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v1
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s4, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5416,9 +5416,9 @@ define float @v_exp_f32_from_fpext_f16(i16 %src.i) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5462,11 +5462,11 @@ define float @v_exp_f32_from_fpext_f16(i16 %src.i) {
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v4
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v3
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v3
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5510,11 +5510,11 @@ define float @v_exp_f32_from_fpext_f16(i16 %src.i) {
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v4
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v3
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v3
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5584,9 +5584,9 @@ define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5634,9 +5634,9 @@ define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5688,9 +5688,9 @@ define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x42b17218
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v3
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v4
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v5, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5905,9 +5905,9 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5955,9 +5955,9 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -6009,9 +6009,9 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x42b17218
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v3
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v4
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v5, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7032,9 +7032,9 @@ define float @v_exp_f32_contract(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7078,9 +7078,9 @@ define float @v_exp_f32_contract(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7124,9 +7124,9 @@ define float @v_exp_f32_contract(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7190,9 +7190,9 @@ define float @v_exp_f32_contract_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7236,9 +7236,9 @@ define float @v_exp_f32_contract_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7282,9 +7282,9 @@ define float @v_exp_f32_contract_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7343,8 +7343,8 @@ define float @v_exp_f32_contract_nnan_ninf(float %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp_f32_contract_nnan_ninf:
@@ -7381,8 +7381,8 @@ define float @v_exp_f32_contract_nnan_ninf(float %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp_f32_contract_nnan_ninf:
@@ -7419,8 +7419,8 @@ define float @v_exp_f32_contract_nnan_ninf(float %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp_f32_contract_nnan_ninf:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
index 0c2e6f82c9115..25e08219d371a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
@@ -71,9 +71,9 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v1
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v1
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -125,11 +125,11 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) {
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v3
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v1
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -181,11 +181,11 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) {
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v3
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v1
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
@@ -426,15 +426,15 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v5, v0
 ; VI-GISEL-NEXT:    v_ldexp_f32 v2, v2, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v3
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v6, 0x7f800000
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v5, v1
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v3
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v3
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v4
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
@@ -506,14 +506,14 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v5, v0
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v2, v2, v3
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v5, v1
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0
@@ -587,14 +587,14 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v5, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v2, v2, v3
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v5, v1
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
@@ -962,9 +962,9 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v5, v5
 ; VI-GISEL-NEXT:    v_ldexp_f32 v0, v0, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v3
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v4
 ; VI-GISEL-NEXT:    s_and_b32 s0, s2, 0xfffff000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v5, v5, v6
@@ -983,13 +983,13 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v6, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v3
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v5, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v3
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
 ; VI-GISEL-NEXT:    v_ldexp_f32 v2, v6, v2
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v3
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v3
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v4
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
@@ -1082,21 +1082,21 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
 ; GFX900-GISEL-NEXT:    v_add_f32_e32 v1, v6, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v3
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x421a209b
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v6, v1
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v5, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v2, v6, v2
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0
@@ -1191,21 +1191,21 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
 ; SI-GISEL-NEXT:    v_add_f32_e32 v1, v6, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v3
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x421a209b
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v6, v1
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v5, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v2, v6, v2
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
@@ -1721,8 +1721,8 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; VI-GISEL-NEXT:    v_add_f32_e32 v1, v6, v1
 ; VI-GISEL-NEXT:    v_cvt_i32_f32_e32 v6, v7
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
 ; VI-GISEL-NEXT:    s_and_b32 s0, s2, 0xfffff000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v6
@@ -1754,19 +1754,19 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; VI-GISEL-NEXT:    v_add_f32_e32 v2, v2, v3
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
 ; VI-GISEL-NEXT:    v_cvt_i32_f32_e32 v3, v8
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v8, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
 ; VI-GISEL-NEXT:    v_ldexp_f32 v3, v8, v3
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
@@ -1857,7 +1857,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v4
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    v_mul_f32_e32 v1, s1, v2
 ; GFX900-GISEL-NEXT:    v_fma_f32 v6, s1, v2, -v1
@@ -1867,7 +1867,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; GFX900-GISEL-NEXT:    v_add_f32_e32 v1, v1, v6
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v6, v7
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v6
@@ -1880,8 +1880,8 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v8, v9
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v6, v6
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v6, v6, v8
 ; GFX900-GISEL-NEXT:    v_mul_f32_e32 v8, s3, v2
 ; GFX900-GISEL-NEXT:    v_fma_f32 v2, s3, v2, -v8
@@ -1893,13 +1893,13 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v8, v2
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v3, v8, v3
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0
@@ -1991,7 +1991,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v4
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
 ; SI-GISEL-NEXT:    s_mov_b32 s6, -1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v1, s1, v2
@@ -2002,7 +2002,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; SI-GISEL-NEXT:    v_add_f32_e32 v1, v1, v6
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v6, v7
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v6
@@ -2015,8 +2015,8 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v8, v9
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v6, v6
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v6, v6, v8
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v8, s3, v2
 ; SI-GISEL-NEXT:    v_fma_f32 v2, s3, v2, -v8
@@ -2028,13 +2028,13 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v8, v2
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v3, v8, v3
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
 ; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
@@ -2567,9 +2567,9 @@ define float @v_exp10_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2613,9 +2613,9 @@ define float @v_exp10_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2659,9 +2659,9 @@ define float @v_exp10_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2727,9 +2727,9 @@ define float @v_exp10_fabs_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2773,9 +2773,9 @@ define float @v_exp10_fabs_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2819,9 +2819,9 @@ define float @v_exp10_fabs_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2888,9 +2888,9 @@ define float @v_exp10_fneg_fabs_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -|v0|, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -|v0|, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2934,9 +2934,9 @@ define float @v_exp10_fneg_fabs_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -|v0|, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -|v0|, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2980,9 +2980,9 @@ define float @v_exp10_fneg_fabs_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -|v0|, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -|v0|, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3050,9 +3050,9 @@ define float @v_exp10_fneg_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3096,9 +3096,9 @@ define float @v_exp10_fneg_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3142,9 +3142,9 @@ define float @v_exp10_fneg_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3420,8 +3420,8 @@ define float @v_exp10_f32_ninf(float %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_ninf:
@@ -3458,8 +3458,8 @@ define float @v_exp10_f32_ninf(float %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_ninf:
@@ -3496,8 +3496,8 @@ define float @v_exp10_f32_ninf(float %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_ninf:
@@ -3828,9 +3828,9 @@ define float @v_exp10_f32_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3874,9 +3874,9 @@ define float @v_exp10_f32_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3920,9 +3920,9 @@ define float @v_exp10_f32_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3986,9 +3986,9 @@ define float @v_exp10_f32_nnan(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4032,9 +4032,9 @@ define float @v_exp10_f32_nnan(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4078,9 +4078,9 @@ define float @v_exp10_f32_nnan(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4144,9 +4144,9 @@ define float @v_exp10_f32_nnan_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4190,9 +4190,9 @@ define float @v_exp10_f32_nnan_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4236,9 +4236,9 @@ define float @v_exp10_f32_nnan_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4302,9 +4302,9 @@ define float @v_exp10_f32_nnan_dynamic(float %in) #1 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4348,9 +4348,9 @@ define float @v_exp10_f32_nnan_dynamic(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4394,9 +4394,9 @@ define float @v_exp10_f32_nnan_dynamic(float %in) #1 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4455,8 +4455,8 @@ define float @v_exp10_f32_ninf_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_ninf_daz:
@@ -4493,8 +4493,8 @@ define float @v_exp10_f32_ninf_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_ninf_daz:
@@ -4531,8 +4531,8 @@ define float @v_exp10_f32_ninf_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_ninf_daz:
@@ -4589,8 +4589,8 @@ define float @v_exp10_f32_ninf_dynamic(float %in) #1 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_ninf_dynamic:
@@ -4627,8 +4627,8 @@ define float @v_exp10_f32_ninf_dynamic(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_ninf_dynamic:
@@ -4665,8 +4665,8 @@ define float @v_exp10_f32_ninf_dynamic(float %in) #1 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_ninf_dynamic:
@@ -4723,8 +4723,8 @@ define float @v_exp10_f32_nnan_ninf(float %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_nnan_ninf:
@@ -4761,8 +4761,8 @@ define float @v_exp10_f32_nnan_ninf(float %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_nnan_ninf:
@@ -4799,8 +4799,8 @@ define float @v_exp10_f32_nnan_ninf(float %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_nnan_ninf:
@@ -4857,8 +4857,8 @@ define float @v_exp10_f32_nnan_ninf_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_nnan_ninf_daz:
@@ -4895,8 +4895,8 @@ define float @v_exp10_f32_nnan_ninf_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_nnan_ninf_daz:
@@ -4933,8 +4933,8 @@ define float @v_exp10_f32_nnan_ninf_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_nnan_ninf_daz:
@@ -4991,8 +4991,8 @@ define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_nnan_ninf_dynamic:
@@ -5029,8 +5029,8 @@ define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_nnan_ninf_dynamic:
@@ -5067,8 +5067,8 @@ define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_nnan_ninf_dynamic:
@@ -5178,9 +5178,9 @@ define float @v_exp10_f32_dynamic_mode(float %in) #1 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5224,9 +5224,9 @@ define float @v_exp10_f32_dynamic_mode(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5270,9 +5270,9 @@ define float @v_exp10_f32_dynamic_mode(float %in) #1 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5322,9 +5322,9 @@ define float @v_exp10_f32_undef() {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v1
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s4, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5361,9 +5361,9 @@ define float @v_exp10_f32_undef() {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v1
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s4, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5400,9 +5400,9 @@ define float @v_exp10_f32_undef() {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v1
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s4, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5494,9 +5494,9 @@ define float @v_exp10_f32_from_fpext_f16(i16 %src.i) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5540,11 +5540,11 @@ define float @v_exp10_f32_from_fpext_f16(i16 %src.i) {
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v4
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v3
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v3
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5588,11 +5588,11 @@ define float @v_exp10_f32_from_fpext_f16(i16 %src.i) {
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v4
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v3
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v3
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5662,9 +5662,9 @@ define float @v_exp10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5712,9 +5712,9 @@ define float @v_exp10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5766,9 +5766,9 @@ define float @v_exp10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x421a209b
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v3
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v4
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v5, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -5998,9 +5998,9 @@ define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -6048,9 +6048,9 @@ define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -6102,9 +6102,9 @@ define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x421a209b
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v3
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v4
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v5, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7126,9 +7126,9 @@ define float @v_exp10_f32_contract(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7172,9 +7172,9 @@ define float @v_exp10_f32_contract(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7218,9 +7218,9 @@ define float @v_exp10_f32_contract(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7284,9 +7284,9 @@ define float @v_exp10_f32_contract_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7330,9 +7330,9 @@ define float @v_exp10_f32_contract_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7376,9 +7376,9 @@ define float @v_exp10_f32_contract_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7437,8 +7437,8 @@ define float @v_exp10_f32_contract_nnan_ninf(float %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_exp10_f32_contract_nnan_ninf:
@@ -7475,8 +7475,8 @@ define float @v_exp10_f32_contract_nnan_ninf(float %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f32_contract_nnan_ninf:
@@ -7513,8 +7513,8 @@ define float @v_exp10_f32_contract_nnan_ninf(float %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp10_f32_contract_nnan_ninf:
diff --git a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
index 24a4d8fbde200..b0b875c2b11a1 100644
--- a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
@@ -123,10 +123,10 @@ define i32 @cmpxchg_private_i32(ptr addrspace(5) %ptr) {
 ; GCN-NEXT:    s_mov_b32 s7, 0xf000
 ; GCN-NEXT:    s_mov_b32 s6, -1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v1, 1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; GCN-NEXT:    v_cndmask_b32_e32 v2, 1, v1, vcc
 ; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
-; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, 1, 0, vcc
 ; GCN-NEXT:    buffer_store_byte v0, off, s[4:7], 0
 ; GCN-NEXT:    s_waitcnt expcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll b/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll
new file mode 100644
index 0000000000000..5618f77fb3d86
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
+
+define amdgpu_cs void @test(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc_lo, -1, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, 0, v3
+; GCN-NEXT:    v_dual_cndmask_b32 v2, 0, v4 :: v_dual_cndmask_b32 v3, v5, v6
+; GCN-NEXT:    global_store_b128 v[7:8], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp eq i32 %a, -1
+  %val1 = select i1 %vcc, i32 %x, i32 %y
+  %val2 = select i1 %vcc, i32 0, i32 %p
+  %val3 = select i1 %vcc, i32 0, i32 %q
+  %val4 = select i1 %vcc, i32 %r, i32 %s
+  %ret0 = insertelement <4 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <4 x i32> %ret0, i32 %val2, i32 1
+  %ret2 = insertelement <4 x i32> %ret1, i32 %val3, i32 2
+  %ret3 = insertelement <4 x i32> %ret2, i32 %val4, i32 3
+  store <4 x i32> %ret3, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_negative_case(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_negative_case:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc_lo, -1, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, 0, v3
+; GCN-NEXT:    v_dual_cndmask_b32 v2, 0, v4 :: v_dual_cndmask_b32 v3, v5, v6
+; GCN-NEXT:    global_store_b128 v[7:8], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp eq i32 %a, -1
+  %val1 = select i1 %vcc, i32 %x, i32 %y
+  %val2 = select i1 %vcc, i32 0, i32 %p
+  %val3 = select i1 %vcc, i32 0, i32 %q
+  %val4 = select i1 %vcc, i32 %r, i32 %s
+  %ret0 = insertelement <4 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <4 x i32> %ret0, i32 %val2, i32 1
+  %ret2 = insertelement <4 x i32> %ret1, i32 %val3, i32 2
+  %ret3 = insertelement <4 x i32> %ret2, i32 %val4, i32 3
+  store <4 x i32> %ret3, ptr addrspace(1) %out
+  ret void
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
index 8129a7ac51df9..d2172d6eb7625 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
@@ -334,9 +334,9 @@ define void @v_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0xbff00000
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
 ; GCN-NEXT:    v_mov_b32_e32 v3, 0
-; GCN-NEXT:    v_cndmask_b32_e64 v4, v4, 0, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GCN-NEXT:    flat_store_dwordx2 v[0:1], v[3:4]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
index 55cbc14a46706..6de24373cf0a4 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
@@ -539,9 +539,9 @@ define void @v_swap_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0x3ff00000
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
 ; GCN-NEXT:    v_mov_b32_e32 v3, 0
-; GCN-NEXT:    v_cndmask_b32_e64 v4, v4, 0, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GCN-NEXT:    flat_store_dwordx2 v[0:1], v[3:4]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]

>From 4443f3a6d2994cdc3e0e7bc5f93d73b38d736a3a Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Fri, 11 Apr 2025 18:24:02 +0200
Subject: [PATCH 2/3] covered 64bit case, updated tests, added operading
 swapping instead of creating new instruction

---
 .../Target/AMDGPU/SIShrinkInstructions.cpp    | 181 +++++++++------
 .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 200 ++++++++---------
 .../AMDGPU/GlobalISel/select-to-fmin-fmax.ll  |  32 +--
 llvm/test/CodeGen/AMDGPU/div_i128.ll          |  24 +-
 llvm/test/CodeGen/AMDGPU/div_v2i128.ll        |  96 ++++----
 llvm/test/CodeGen/AMDGPU/extract-subvector.ll |   8 +-
 llvm/test/CodeGen/AMDGPU/llvm.exp.ll          | 210 +++++++++---------
 llvm/test/CodeGen/AMDGPU/llvm.exp10.ll        | 210 +++++++++---------
 .../CodeGen/AMDGPU/private-memory-atomics.ll  |   8 +-
 llvm/test/CodeGen/AMDGPU/rem_i128.ll          |  24 +-
 .../CodeGen/AMDGPU/short-select-cndmask.ll    |  88 +++++++-
 llvm/test/CodeGen/AMDGPU/uaddsat.ll           |  29 ++-
 llvm/test/CodeGen/AMDGPU/usubsat.ll           |  29 ++-
 13 files changed, 626 insertions(+), 513 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 0b1638c25b9ae..f40655955fac8 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -51,10 +51,10 @@ class SIShrinkInstructions {
                     unsigned SubReg) const;
   bool instModifiesReg(const MachineInstr *MI, unsigned Reg,
                        unsigned SubReg) const;
-  bool trySwitchOperands(MachineInstr &MI, Register *OldVCC,
-                         Register *NewVCC) const;
-  bool shouldSwitchOperands(MachineRegisterInfo &MRI, MachineInstr &MI,
-                            const SIInstrInfo &TII) const;
+  Register trySwapCndOperands(MachineInstr &MI) const;
+  bool
+  shouldSwapCndOperands(MachineInstr &MI, const SIInstrInfo &TII,
+                        SmallVector<MachineOperand *, 4> &UsesToProcess) const;
   unsigned getInverseCompareOpcode(MachineInstr &MI) const;
   TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub,
                                                    unsigned I) const;
@@ -851,92 +851,137 @@ unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const {
     return AMDGPU::V_CMP_LE_U32_e64;
   case AMDGPU::V_CMP_LT_U32_e64:
     return AMDGPU::V_CMP_GE_U32_e64;
-    // float 32
+    // unsigned 64
+  case AMDGPU::V_CMP_EQ_U64_e64:
+    return AMDGPU::V_CMP_NE_U64_e64;
+  case AMDGPU::V_CMP_NE_U64_e64:
+    return AMDGPU::V_CMP_EQ_U64_e64;
+  case AMDGPU::V_CMP_GE_U64_e64:
+    return AMDGPU::V_CMP_LT_U64_e64;
+  case AMDGPU::V_CMP_LE_U64_e64:
+    return AMDGPU::V_CMP_GT_U64_e64;
+  case AMDGPU::V_CMP_GT_U64_e64:
+    return AMDGPU::V_CMP_LE_U64_e64;
+  case AMDGPU::V_CMP_LT_U64_e64:
+    return AMDGPU::V_CMP_GE_U64_e64;
+  // float 32
   case AMDGPU::V_CMP_EQ_F32_e64:
     return AMDGPU::V_CMP_NEQ_F32_e64;
   case AMDGPU::V_CMP_NEQ_F32_e64:
     return AMDGPU::V_CMP_EQ_F32_e64;
   case AMDGPU::V_CMP_GE_F32_e64:
-    return AMDGPU::V_CMP_LT_F32_e64;
+    return AMDGPU::V_CMP_NGE_F32_e64;
   case AMDGPU::V_CMP_LE_F32_e64:
-    return AMDGPU::V_CMP_GT_F32_e64;
+    return AMDGPU::V_CMP_NLE_F32_e64;
   case AMDGPU::V_CMP_GT_F32_e64:
-    return AMDGPU::V_CMP_LE_F32_e64;
+    return AMDGPU::V_CMP_NGT_F32_e64;
   case AMDGPU::V_CMP_LT_F32_e64:
-    return AMDGPU::V_CMP_GE_F32_e64;
+    return AMDGPU::V_CMP_NLT_F32_e64;
+  // float 64
+  case AMDGPU::V_CMP_EQ_F64_e64:
+    return AMDGPU::V_CMP_NEQ_F64_e64;
+  case AMDGPU::V_CMP_NEQ_F64_e64:
+    return AMDGPU::V_CMP_EQ_F64_e64;
+  case AMDGPU::V_CMP_GE_F64_e64:
+    return AMDGPU::V_CMP_NGE_F64_e64;
+  case AMDGPU::V_CMP_LE_F64_e64:
+    return AMDGPU::V_CMP_NLE_F64_e64;
+  case AMDGPU::V_CMP_GT_F64_e64:
+    return AMDGPU::V_CMP_NGT_F64_e64;
+  case AMDGPU::V_CMP_LT_F64_e64:
+    return AMDGPU::V_CMP_NLT_F64_e64;
   default:
     return 0;
   }
 }
 
-bool SIShrinkInstructions::shouldSwitchOperands(MachineRegisterInfo &MRI,
-                                                MachineInstr &MI,
-                                                const SIInstrInfo &TII) const {
-  auto allUses = MRI.use_nodbg_operands(MI.getOperand(5).getReg());
-  unsigned Count = 0;
+bool SIShrinkInstructions::shouldSwapCndOperands(
+    MachineInstr &MI, const SIInstrInfo &TII,
+    SmallVector<MachineOperand *, 4> &UsesToProcess) const {
+  auto AllUses = MRI->use_nodbg_operands(MI.getOperand(0).getReg());
+  bool ShouldSwap = false;
 
-  for (auto &Use : allUses) {
-    if (Use.getParent()->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
+  for (auto &Use : AllUses) {
+    MachineInstr *UseInst = Use.getParent();
+    if (UseInst->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
       return false;
-    MachineOperand *Src0 =
-        TII.getNamedOperand(*Use.getParent(), AMDGPU::OpName::src0);
-    MachineOperand *Src1 =
-        TII.getNamedOperand(*Use.getParent(), AMDGPU::OpName::src1);
+    MachineOperand *Src0 = TII.getNamedOperand(*UseInst, AMDGPU::OpName::src0);
+    MachineOperand *Src1 = TII.getNamedOperand(*UseInst, AMDGPU::OpName::src1);
 
     auto Src0Imm = Src0->isImm();
     auto Src1Imm = Src1->isImm();
 
     if (!Src1Imm && Src0Imm)
       return false;
-    if (Src1Imm && !Src0Imm)
-      Count++;
+
+    UsesToProcess.push_back(&Use);
+
+    if (Src1Imm && !Src0Imm && !UseInst->getOperand(1).getImm())
+      ShouldSwap = true;
   }
-  return (Count >= 1);
+  return ShouldSwap;
 }
 
-// OldVCC and NewVCC are used to remember VCC after inverting comparison
-bool SIShrinkInstructions::trySwitchOperands(MachineInstr &MI, Register *OldVCC,
-                                             Register *NewVCC) const {
-  const DebugLoc &DL = MI.getDebugLoc();
-  auto Reg = MI.getOperand(5).getReg();
-  if (!Reg.isVirtual())
-    return false;
+void swapCndOperands(MachineInstr &MI) {
+  MachineOperand Op2 = MI.getOperand(2);
+  MachineOperand Op4 = MI.getOperand(4);
+
+  if (Op2.isReg()) {
+    MI.getOperand(4).ChangeToRegister(
+        Op2.getReg(), Op2.isDef(), Op2.isImplicit(), Op2.isKill(), Op2.isDead(),
+        Op2.isUndef(), Op2.isDebug());
+    if (Op2.getSubReg() != AMDGPU::NoSubRegister)
+      MI.getOperand(4).setSubReg(Op2.getSubReg());
+  } else if (Op2.isImm()) {
+    MI.getOperand(4).ChangeToImmediate(Op2.getImm());
+  }
 
-  if (*OldVCC != Reg) {
-    MachineInstr *DefMI = MRI->getVRegDef(Reg);
-    if (DefMI) {
-      unsigned Opcode = getInverseCompareOpcode(*DefMI);
-      if (Opcode &&
-          SIShrinkInstructions::shouldSwitchOperands(*MRI, MI, *TII)) {
-        auto cmpDL = DefMI->getDebugLoc();
-        *NewVCC = MRI->createVirtualRegister(MRI->getRegClass(Reg));
-        *OldVCC = Reg;
-        MachineInstrBuilder InverseCompare = BuildMI(
-            *DefMI->getParent(), DefMI, cmpDL, TII->get(Opcode), *NewVCC);
-        InverseCompare->setFlags(DefMI->getFlags());
-
-        unsigned OpNum = DefMI->getNumExplicitOperands();
-        for (unsigned i = 1; i < OpNum; i++) {
-          MachineOperand Op = DefMI->getOperand(i);
-          InverseCompare.add(Op);
-          if (Op.isReg() && Op.isKill())
-            InverseCompare->getOperand(i).setIsKill(false);
-        }
-      }
-    }
+  if (Op4.isReg()) {
+    MI.getOperand(2).setReg(Op4.getReg());
+    if (Op4.getSubReg() != AMDGPU::NoSubRegister)
+      MI.getOperand(2).setSubReg(Op4.getSubReg());
+  } else if (Op4.isImm()) {
+    MI.getOperand(2).ChangeToImmediate(Op4.getImm());
   }
-  if (*OldVCC == Reg) {
-    BuildMI(*MI.getParent(), MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64),
-            MI.getOperand(0).getReg())
-        .add(MI.getOperand(3))
-        .add(MI.getOperand(4))
-        .add(MI.getOperand(1))
-        .add(MI.getOperand(2))
-        .addReg(*NewVCC);
-    MI.eraseFromParent();
-    return true;
+
+  MachineOperand Op1 = MI.getOperand(1);
+  MachineOperand Op3 = MI.getOperand(3);
+  MI.getOperand(1).setImm(Op3.getImm());
+  MI.getOperand(3).setImm(Op1.getImm());
+}
+
+Register SIShrinkInstructions::trySwapCndOperands(MachineInstr &MI) const {
+  Register Reg = MI.getOperand(0).getReg();
+
+  unsigned Opcode = getInverseCompareOpcode(MI);
+  SmallVector<MachineOperand *, 4> UsesToProcess;
+  if (!Opcode ||
+      !SIShrinkInstructions::shouldSwapCndOperands(MI, *TII, UsesToProcess))
+    return AMDGPU::NoRegister;
+
+  auto DL = MI.getDebugLoc();
+  Register NewVCC = MRI->createVirtualRegister(MRI->getRegClass(Reg));
+
+  MachineInstrBuilder InverseCompare =
+      BuildMI(*MI.getParent(), MI, DL, TII->get(Opcode), NewVCC);
+  InverseCompare->setFlags(MI.getFlags());
+
+  unsigned OpNum = MI.getNumExplicitOperands();
+  for (unsigned i = 1; i < OpNum; i++) {
+    MachineOperand Op = MI.getOperand(i);
+    InverseCompare.add(Op);
+    if (Op.isReg() && Op.isKill())
+      InverseCompare->getOperand(i).setIsKill(false);
   }
-  return false;
+
+  for (auto &Use : UsesToProcess) {
+    MachineInstr *Inst = Use->getParent();
+    swapCndOperands(*Inst);
+  }
+
+  MRI->replaceRegWith(Reg, NewVCC);
+  MI.eraseFromParent();
+  return NewVCC;
 }
 
 bool SIShrinkInstructions::run(MachineFunction &MF) {
@@ -950,9 +995,6 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
   unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
 
   std::vector<unsigned> I1Defs;
-  Register OldVCC = AMDGPU::NoRegister;
-  Register NewVCC = AMDGPU::NoRegister;
-
   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
                                                   BI != BE; ++BI) {
 
@@ -1083,10 +1125,6 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
         continue;
       }
 
-      if (MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64 &&
-          trySwitchOperands(MI, &OldVCC, &NewVCC))
-        MRI->setRegAllocationHint(NewVCC, 0, VCCReg);
-
       // If there is no chance we will shrink it and use VCC as sdst to get
       // a 32 bit form try to replace dead sdst with NULL.
       if (TII->isVOP3(MI.getOpcode())) {
@@ -1124,6 +1162,9 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
             // provide a hint to the register allocator to use VCC and then we
             // will run this pass again after RA and shrink it if it outputs to
             // VCC.
+            Register NewVCC = trySwapCndOperands(MI);
+            DstReg = NewVCC == AMDGPU::NoRegister ? DstReg : NewVCC;
+
             MRI->setRegAllocationHint(DstReg, 0, VCCReg);
             continue;
           }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index 03b713f6866a0..c17de70ba2291 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -5155,8 +5155,8 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX8-NEXT:    s_and_b32 s0, 1, s2
-; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s0
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -5202,8 +5202,8 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX9-NEXT:    s_and_b32 s0, 1, s2
-; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s0
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -5241,16 +5241,16 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
 ; GFX10-NEXT:    s_and_b32 s0, 1, s10
 ; GFX10-NEXT:    s_cmp_eq_u64 s[6:7], 0
-; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
-; GFX10-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
+; GFX10-NEXT:    s_cselect_b32 s1, 1, 0
+; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
 ; GFX10-NEXT:    s_and_b32 s1, 1, s1
-; GFX10-NEXT:    s_ashr_i32 s0, s9, 31
+; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, s1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s1
-; GFX10-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, 0, s0
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s5
+; GFX10-NEXT:    s_ashr_i32 s0, s9, 31
+; GFX10-NEXT:    s_add_i32 s1, s0, 0x80000000
 ; GFX10-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -5282,15 +5282,16 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
 ; GFX11-NEXT:    s_and_b32 s0, 1, s10
 ; GFX11-NEXT:    s_cmp_eq_u64 s[6:7], 0
-; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
-; GFX11-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
+; GFX11-NEXT:    s_cselect_b32 s1, 1, 0
+; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
 ; GFX11-NEXT:    s_and_b32 s1, 1, s1
-; GFX11-NEXT:    s_ashr_i32 s0, s9, 31
+; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, 0, s1
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, 0, s0
+; GFX11-NEXT:    v_mov_b32_e32 v2, s5
+; GFX11-NEXT:    s_ashr_i32 s0, s9, 31
 ; GFX11-NEXT:    s_add_i32 s1, s0, 0x80000000
-; GFX11-NEXT:    v_dual_cndmask_b32 v1, 0, v2 :: v_dual_mov_b32 v2, s5
 ; GFX11-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX11-NEXT:    v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 1, v0
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
@@ -5326,9 +5327,9 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX6-NEXT:    v_cndmask_b32_e32 v6, v7, v6, vcc
 ; GFX6-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; GFX6-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v5
-; GFX6-NEXT:    v_cndmask_b32_e64 v2, v7, 0, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v7, vcc
 ; GFX6-NEXT:    v_xor_b32_e32 v2, v2, v6
 ; GFX6-NEXT:    v_bfrev_b32_e32 v6, 1
 ; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v3, v6
@@ -5357,9 +5358,9 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v7, v6, vcc
 ; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; GFX8-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v7, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v7, vcc
 ; GFX8-NEXT:    v_xor_b32_e32 v2, v2, v6
 ; GFX8-NEXT:    v_bfrev_b32_e32 v6, 1
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v3, v6
@@ -5388,9 +5389,9 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v7, v6, vcc
 ; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, v7, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v7, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v2, v2, v6
 ; GFX9-NEXT:    v_and_b32_e32 v2, 1, v2
 ; GFX9-NEXT:    v_add_u32_e32 v6, 0x80000000, v3
@@ -5415,9 +5416,9 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc_lo
 ; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[2:3], v[4:5]
 ; GFX10-NEXT:    v_cndmask_b32_e32 v6, v7, v6, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[2:3]
+; GFX10-NEXT:    v_cmp_ne_u64_e32 vcc_lo, 0, v[2:3]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v5
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v8, 0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc_lo
 ; GFX10-NEXT:    v_xor_b32_e32 v2, v2, v6
 ; GFX10-NEXT:    v_add_nc_u32_e32 v6, 0x80000000, v3
 ; GFX10-NEXT:    v_and_b32_e32 v2, 1, v2
@@ -5442,15 +5443,15 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[2:3], v[4:5]
 ; GFX11-NEXT:    v_cndmask_b32_e32 v6, v7, v6, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[2:3]
+; GFX11-NEXT:    v_cmp_ne_u64_e32 vcc_lo, 0, v[2:3]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v5
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v8, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc_lo
 ; GFX11-NEXT:    v_xor_b32_e32 v2, v2, v6
 ; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0x80000000, v3
 ; GFX11-NEXT:    v_and_b32_e32 v2, 1, v2
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
 ; GFX11-NEXT:    v_dual_cndmask_b32 v2, v4, v3 :: v_dual_cndmask_b32 v3, v5, v6
 ; GFX11-NEXT:    ; return to shader part epilog
   %result = call i128 @llvm.sadd.sat.i128(i128 %lhs, i128 %rhs)
@@ -5510,8 +5511,8 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX8-NEXT:    s_and_b32 s0, 1, s4
-; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s0
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX8-NEXT:    v_bfrev_b32_e32 v1, 1
@@ -5544,8 +5545,8 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX9-NEXT:    s_and_b32 s0, 1, s4
-; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s0
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -5571,13 +5572,13 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[6:7], v[2:3]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s1
+; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
 ; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s0
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0, v8, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v8, 0, s0
 ; GFX10-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
@@ -5601,18 +5602,18 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[6:7], v[2:3]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s1
+; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v1, v0 :: v_dual_add_nc_u32 v3, 0x80000000, v2
-; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0, v8, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, v8, 0, s0
 ; GFX11-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v4, v2, vcc_lo
-; GFX11-NEXT:    v_dual_cndmask_b32 v2, v6, v2 :: v_dual_cndmask_b32 v3, v7, v3
+; GFX11-NEXT:    v_dual_cndmask_b32 v0, v4, v2 :: v_dual_cndmask_b32 v3, v7, v3
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc_lo
 ; GFX11-NEXT:    ; return to shader part epilog
   %result = call i128 @llvm.sadd.sat.i128(i128 %lhs, i128 %rhs)
   %cast = bitcast i128 %result to <4 x float>
@@ -5636,8 +5637,8 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX6-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[10:11]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX6-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX6-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX6-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v2, v1
@@ -5660,8 +5661,8 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
 ; GFX6-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[14:15]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
-; GFX6-NEXT:    v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX6-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
+; GFX6-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GFX6-NEXT:    v_xor_b32_e32 v4, v5, v4
 ; GFX6-NEXT:    v_add_i32_e32 v7, vcc, 0x80000000, v6
 ; GFX6-NEXT:    v_and_b32_e32 v4, 1, v4
@@ -5688,8 +5689,8 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[10:11]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX8-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v1
@@ -5712,8 +5713,8 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
 ; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[14:15]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX8-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GFX8-NEXT:    v_xor_b32_e32 v4, v5, v4
 ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, 0x80000000, v6
 ; GFX8-NEXT:    v_and_b32_e32 v4, 1, v4
@@ -5741,8 +5742,8 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[10:11]
 ; GFX9-NEXT:    v_add_u32_e32 v3, 0x80000000, v2
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
@@ -5764,8 +5765,8 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[14:15]
 ; GFX9-NEXT:    v_add_u32_e32 v7, 0x80000000, v6
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v4, v5, v4
 ; GFX9-NEXT:    v_and_b32_e32 v4, 1, v4
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
@@ -5794,8 +5795,8 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, v5, v13, vcc_lo
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v18, vcc_lo, v6, v14, vcc_lo
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v19, vcc_lo, v7, v15, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[10:11]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u64_e32 vcc_lo, 0, v[10:11]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc_lo
 ; GFX10-NEXT:    v_cmp_lt_u64_e32 vcc_lo, v[12:13], v[4:5]
 ; GFX10-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
@@ -5807,9 +5808,9 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[18:19], v[6:7]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v19
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[14:15]
+; GFX10-NEXT:    v_cmp_ne_u64_e32 vcc_lo, 0, v[14:15]
 ; GFX10-NEXT:    v_add_nc_u32_e32 v7, 0x80000000, v6
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc_lo
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX10-NEXT:    v_xor_b32_e32 v1, v2, v1
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v2, 31, v17
@@ -5845,8 +5846,8 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX11-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, v5, v13, vcc_lo
 ; GFX11-NEXT:    v_add_co_ci_u32_e32 v18, vcc_lo, v6, v14, vcc_lo
 ; GFX11-NEXT:    v_add_co_ci_u32_e64 v19, null, v7, v15, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[10:11]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u64_e32 vcc_lo, 0, v[10:11]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc_lo
 ; GFX11-NEXT:    v_cmp_lt_u64_e32 vcc_lo, v[12:13], v[4:5]
 ; GFX11-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
@@ -5857,9 +5858,8 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[18:19], v[6:7]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v19
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[14:15]
-; GFX11-NEXT:    v_add_nc_u32_e32 v7, 0x80000000, v6
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u64_e32 vcc_lo, 0, v[14:15]
+; GFX11-NEXT:    v_dual_cndmask_b32 v2, 0, v4 :: v_dual_add_nc_u32 v7, 0x80000000, v6
 ; GFX11-NEXT:    v_xor_b32_e32 v1, v2, v1
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v2, 31, v17
 ; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -5981,8 +5981,8 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX8-NEXT:    s_and_b32 s0, 1, s2
-; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s0
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_ashr_i32 s0, s17, 31
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -6020,8 +6020,8 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
 ; GFX8-NEXT:    s_and_b32 s4, 1, s6
-; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s4
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, s4
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -6071,8 +6071,8 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX9-NEXT:    s_and_b32 s0, 1, s2
-; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s0
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    s_ashr_i32 s0, s17, 31
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -6110,8 +6110,8 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
 ; GFX9-NEXT:    s_and_b32 s4, 1, s6
-; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, 0, s4
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, s4
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
@@ -6153,53 +6153,53 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
 ; GFX10-NEXT:    s_and_b32 s0, 1, s18
 ; GFX10-NEXT:    s_cmp_eq_u64 s[10:11], 0
-; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
 ; GFX10-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX10-NEXT:    s_ashr_i32 s10, s17, 31
+; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
 ; GFX10-NEXT:    s_and_b32 s1, 1, s1
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s1
+; GFX10-NEXT:    s_ashr_i32 s10, s17, 31
+; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, s1
 ; GFX10-NEXT:    s_add_i32 s11, s10, 0x80000000
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, 0, s0
 ; GFX10-NEXT:    s_add_u32 s0, s4, s12
 ; GFX10-NEXT:    s_addc_u32 s1, s5, s13
 ; GFX10-NEXT:    s_addc_u32 s2, s6, s14
 ; GFX10-NEXT:    v_cmp_lt_u64_e64 s4, s[0:1], s[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc_lo
 ; GFX10-NEXT:    s_addc_u32 s3, s7, s15
-; GFX10-NEXT:    v_mov_b32_e32 v5, s0
-; GFX10-NEXT:    s_cmp_eq_u64 s[2:3], s[6:7]
-; GFX10-NEXT:    v_mov_b32_e32 v6, s1
 ; GFX10-NEXT:    v_xor_b32_e32 v0, v1, v0
+; GFX10-NEXT:    s_cmp_eq_u64 s[2:3], s[6:7]
+; GFX10-NEXT:    v_mov_b32_e32 v5, s0
+; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s4
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s4, s[2:3], s[6:7]
-; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s6, s[14:15], 0
 ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX10-NEXT:    v_mov_b32_e32 v6, s1
 ; GFX10-NEXT:    v_mov_b32_e32 v7, s3
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s4
 ; GFX10-NEXT:    s_and_b32 s4, 1, s12
 ; GFX10-NEXT:    s_cmp_eq_u64 s[14:15], 0
-; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s4
-; GFX10-NEXT:    s_cselect_b32 s5, 1, 0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s6
+; GFX10-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s4
 ; GFX10-NEXT:    s_and_b32 s5, 1, s5
-; GFX10-NEXT:    s_ashr_i32 s4, s3, 31
+; GFX10-NEXT:    v_cmp_ne_u32_e64 s4, 0, s5
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s5
-; GFX10-NEXT:    s_add_i32 s0, s4, 0x80000000
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0, v3, vcc_lo
-; GFX10-NEXT:    v_mov_b32_e32 v3, s8
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s16
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v3, 0, s4
+; GFX10-NEXT:    v_mov_b32_e32 v3, s8
+; GFX10-NEXT:    s_ashr_i32 s4, s3, 31
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, s10, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s10, vcc_lo
 ; GFX10-NEXT:    v_xor_b32_e32 v1, v2, v1
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s17
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, s10, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, s10, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s10, vcc_lo
+; GFX10-NEXT:    s_add_i32 s0, s4, 0x80000000
+; GFX10-NEXT:    v_readfirstlane_b32 s1, v4
 ; GFX10-NEXT:    v_and_b32_e32 v1, 1, v1
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
-; GFX10-NEXT:    v_readfirstlane_b32 s1, v4
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX10-NEXT:    v_readfirstlane_b32 s2, v0
@@ -6230,43 +6230,42 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
 ; GFX11-NEXT:    s_and_b32 s0, 1, s18
 ; GFX11-NEXT:    s_cmp_eq_u64 s[10:11], 0
-; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
 ; GFX11-NEXT:    s_cselect_b32 s1, 1, 0
-; GFX11-NEXT:    s_ashr_i32 s10, s17, 31
+; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
 ; GFX11-NEXT:    s_and_b32 s1, 1, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s1
+; GFX11-NEXT:    s_ashr_i32 s10, s17, 31
+; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, 0, s1
 ; GFX11-NEXT:    s_add_i32 s11, s10, 0x80000000
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, 0, s0
 ; GFX11-NEXT:    s_add_u32 s0, s4, s12
 ; GFX11-NEXT:    s_addc_u32 s1, s5, s13
 ; GFX11-NEXT:    s_addc_u32 s2, s6, s14
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_cmp_lt_u64_e64 s4, s[0:1], s[4:5]
 ; GFX11-NEXT:    s_addc_u32 s3, s7, s15
-; GFX11-NEXT:    v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v7, s3
 ; GFX11-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX11-NEXT:    s_cmp_eq_u64 s[2:3], s[6:7]
+; GFX11-NEXT:    v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v7, s3
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s4
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s4, s[2:3], s[6:7]
 ; GFX11-NEXT:    s_cselect_b32 s12, 1, 0
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s6, s[14:15], 0
-; GFX11-NEXT:    v_mov_b32_e32 v5, s0
+; GFX11-NEXT:    v_dual_mov_b32 v5, s0 :: v_dual_and_b32 v0, 1, v0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s4
 ; GFX11-NEXT:    s_and_b32 s4, 1, s12
 ; GFX11-NEXT:    s_cmp_eq_u64 s[14:15], 0
-; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s6
 ; GFX11-NEXT:    s_cselect_b32 s5, 1, 0
-; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s4
 ; GFX11-NEXT:    s_and_b32 s5, 1, s5
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s6
+; GFX11-NEXT:    v_cmp_ne_u32_e64 s4, 0, s5
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, 0, s5
-; GFX11-NEXT:    s_ashr_i32 s4, s3, 31
-; GFX11-NEXT:    s_add_i32 s0, s4, 0x80000000
-; GFX11-NEXT:    v_dual_cndmask_b32 v2, 0, v3 :: v_dual_mov_b32 v3, s8
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, v3, 0, s4
+; GFX11-NEXT:    v_mov_b32_e32 v3, s8
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s16
+; GFX11-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX11-NEXT:    v_xor_b32_e32 v1, v2, v1
 ; GFX11-NEXT:    v_mov_b32_e32 v4, s9
 ; GFX11-NEXT:    v_mov_b32_e32 v2, s17
@@ -6275,6 +6274,7 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
 ; GFX11-NEXT:    v_and_b32_e32 v1, 1, v1
 ; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, s10, vcc_lo
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
+; GFX11-NEXT:    s_add_i32 s0, s4, 0x80000000
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX11-NEXT:    v_readfirstlane_b32 s1, v4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll
index 1991afa83870a..22f2c47c0fddf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll
@@ -18,7 +18,7 @@ define float @test_s32(float %a) #0 {
 ; GCN-LABEL: test_s32:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v0
+; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
@@ -31,9 +31,9 @@ define double @test_s64(double %a) #0 {
 ; GCN-LABEL: test_s64:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cmp_gt_f64_e32 vcc, 0, v[0:1]
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GCN-NEXT:    v_cmp_ngt_f64_e32 vcc, 0, v[0:1]
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %fcmp = fcmp olt double %a, 0.0
@@ -111,9 +111,9 @@ define <2 x float> @test_v2s32(<2 x float> %a) #0 {
 ; GCN-LABEL: test_v2s32:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v0
+; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v1
+; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
@@ -126,13 +126,13 @@ define <4 x float> @test_v4s32(<4 x float> %a) #0 {
 ; GCN-LABEL: test_v4s32:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v0
+; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v1
+; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v2
+; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v2
 ; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GCN-NEXT:    v_cmp_le_f32_e32 vcc, 0, v3
+; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v3
 ; GCN-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
@@ -145,12 +145,12 @@ define <2 x double> @test_v2s64(<2 x double> %a) #0 {
 ; GCN-LABEL: test_v2s64:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cmp_gt_f64_e32 vcc, 0, v[0:1]
-; GCN-NEXT:    v_cmp_gt_f64_e64 s[4:5], 0, v[2:3]
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v3, 0, s[4:5]
+; GCN-NEXT:    v_cmp_ngt_f64_e32 vcc, 0, v[0:1]
+; GCN-NEXT:    v_cmp_ngt_f64_e64 s[4:5], 0, v[2:3]
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[4:5]
+; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s[4:5]
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %fcmp = fcmp olt <2 x double> %a, zeroinitializer
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index efd633d21dba1..e964cc67f8135 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -46,9 +46,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, 64, v3
 ; GFX9-NEXT:    v_addc_co_u32_e64 v4, s[6:7], 0, 0, vcc
-; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
 ; GFX9-NEXT:    v_ffbh_u32_e32 v6, v11
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX9-NEXT:    v_ffbh_u32_e32 v3, v10
 ; GFX9-NEXT:    v_add_u32_e32 v3, 32, v3
 ; GFX9-NEXT:    v_min_u32_e32 v3, v3, v6
@@ -56,13 +56,13 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_add_u32_e32 v6, 32, v6
 ; GFX9-NEXT:    v_ffbh_u32_e32 v7, v9
 ; GFX9-NEXT:    v_min_u32_e32 v6, v6, v7
-; GFX9-NEXT:    v_cndmask_b32_e64 v4, v4, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, 64, v6
 ; GFX9-NEXT:    v_addc_co_u32_e64 v7, s[6:7], 0, 0, vcc
-; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
+; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
 ; GFX9-NEXT:    v_mov_b32_e32 v5, 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v7, v7, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc
 ; GFX9-NEXT:    v_sub_co_u32_e32 v2, vcc, v2, v3
 ; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v4, v7, vcc
 ; GFX9-NEXT:    v_subb_co_u32_e32 v4, vcc, 0, v5, vcc
@@ -2324,9 +2324,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX9-NEXT:    v_add_co_u32_e32 v9, vcc, 64, v9
 ; GFX9-NEXT:    v_addc_co_u32_e64 v10, s[6:7], 0, 0, vcc
-; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
+; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[6:7]
 ; GFX9-NEXT:    v_ffbh_u32_e32 v11, v3
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
 ; GFX9-NEXT:    v_ffbh_u32_e32 v9, v2
 ; GFX9-NEXT:    v_add_u32_e32 v9, 32, v9
 ; GFX9-NEXT:    v_min_u32_e32 v9, v9, v11
@@ -2334,13 +2334,13 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_add_u32_e32 v11, 32, v11
 ; GFX9-NEXT:    v_ffbh_u32_e32 v12, v1
 ; GFX9-NEXT:    v_min_u32_e32 v11, v11, v12
-; GFX9-NEXT:    v_cndmask_b32_e64 v10, v10, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v11, vcc, 64, v11
 ; GFX9-NEXT:    v_addc_co_u32_e64 v12, s[6:7], 0, 0, vcc
-; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
 ; GFX9-NEXT:    s_mov_b64 s[6:7], 0x7f
-; GFX9-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v13, v12, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v13, 0, v12, vcc
 ; GFX9-NEXT:    v_sub_co_u32_e32 v12, vcc, v8, v9
 ; GFX9-NEXT:    v_subb_co_u32_e32 v13, vcc, v10, v13, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index 07d7276e3b944..f08a03a890e3f 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -42,9 +42,9 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_addc_u32_e64 v3, s[8:9], 0, 0, s[8:9]
 ; SDAG-NEXT:    v_subb_u32_e32 v8, vcc, 0, v11, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s[6:7]
-; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[16:17]
-; SDAG-NEXT:    v_cndmask_b32_e64 v9, v3, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e32 v10, v2, v1, vcc
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
+; SDAG-NEXT:    v_cndmask_b32_e32 v9, 0, v3, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v10, v1, v2, vcc
 ; SDAG-NEXT:    v_ffbh_u32_e32 v3, v29
 ; SDAG-NEXT:    v_ffbh_u32_e32 v19, v28
 ; SDAG-NEXT:    v_cndmask_b32_e64 v1, v11, v8, s[6:7]
@@ -59,9 +59,9 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_min_u32_e32 v2, v11, v19
 ; SDAG-NEXT:    v_add_i32_e64 v3, s[6:7], 64, v8
 ; SDAG-NEXT:    v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7]
-; SDAG-NEXT:    v_cmp_ne_u64_e64 s[6:7], 0, v[0:1]
-; SDAG-NEXT:    v_cndmask_b32_e64 v8, v8, 0, s[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v2, v3, v2, s[6:7]
+; SDAG-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[6:7]
 ; SDAG-NEXT:    s_or_b64 s[6:7], vcc, s[4:5]
 ; SDAG-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
 ; SDAG-NEXT:    v_subb_u32_e32 v3, vcc, v8, v9, vcc
@@ -241,9 +241,9 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_cndmask_b32_e64 v0, v14, v10, s[4:5]
 ; SDAG-NEXT:    v_ffbh_u32_e32 v10, v29
 ; SDAG-NEXT:    v_ffbh_u32_e32 v11, v28
-; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v12, v5, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e32 v13, v4, v1, vcc
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e32 v12, 0, v5, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v13, v1, v4, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v1, v15, v9, s[4:5]
 ; SDAG-NEXT:    v_or_b32_e32 v4, v29, v0
 ; SDAG-NEXT:    v_ffbh_u32_e32 v9, v0
@@ -257,9 +257,9 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_add_i32_e64 v5, s[4:5], 64, v10
 ; SDAG-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5]
 ; SDAG-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; SDAG-NEXT:    v_cndmask_b32_e64 v9, v9, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
 ; SDAG-NEXT:    v_sub_i32_e32 v4, vcc, v4, v13
 ; SDAG-NEXT:    v_subb_u32_e32 v5, vcc, v9, v12, vcc
 ; SDAG-NEXT:    v_xor_b32_e32 v9, 0x7f, v4
@@ -863,12 +863,12 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_addc_u32_e64 v20, s[4:5], 0, 0, vcc
 ; SDAG-NEXT:    v_add_i32_e32 v19, vcc, 64, v19
 ; SDAG-NEXT:    v_addc_u32_e64 v21, s[4:5], 0, 0, vcc
-; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
-; SDAG-NEXT:    v_cndmask_b32_e64 v20, v20, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e32 v16, v17, v16, vcc
-; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
-; SDAG-NEXT:    v_cndmask_b32_e64 v17, v21, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e32 v18, v19, v18, vcc
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
+; SDAG-NEXT:    v_cndmask_b32_e32 v20, 0, v20, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v16, v16, v17, vcc
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; SDAG-NEXT:    v_cndmask_b32_e32 v17, 0, v21, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v18, v18, v19, vcc
 ; SDAG-NEXT:    v_sub_i32_e32 v22, vcc, v16, v18
 ; SDAG-NEXT:    v_subb_u32_e32 v23, vcc, v20, v17, vcc
 ; SDAG-NEXT:    v_xor_b32_e32 v16, 0x7f, v22
@@ -1038,12 +1038,12 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, vcc
 ; SDAG-NEXT:    v_add_i32_e32 v3, vcc, 64, v3
 ; SDAG-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, vcc
-; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
-; SDAG-NEXT:    v_cndmask_b32_e64 v8, v8, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v1, v9, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
+; SDAG-NEXT:    v_cndmask_b32_e32 v8, 0, v8, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v9, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; SDAG-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
 ; SDAG-NEXT:    v_subb_u32_e32 v1, vcc, v8, v1, vcc
 ; SDAG-NEXT:    v_xor_b32_e32 v2, 0x7f, v0
@@ -1595,9 +1595,9 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_addc_u32_e64 v18, s[8:9], 0, 0, s[8:9]
 ; SDAG-NEXT:    v_subb_u32_e32 v20, vcc, 0, v11, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v2, v10, v9, s[4:5]
-; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; SDAG-NEXT:    v_cndmask_b32_e64 v18, v18, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e32 v10, v8, v3, vcc
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e32 v18, 0, v18, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v10, v3, v8, vcc
 ; SDAG-NEXT:    v_ffbh_u32_e32 v9, v31
 ; SDAG-NEXT:    v_ffbh_u32_e32 v21, v30
 ; SDAG-NEXT:    v_cndmask_b32_e64 v3, v11, v20, s[4:5]
@@ -1612,9 +1612,9 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_min_u32_e32 v8, v20, v21
 ; SDAG-NEXT:    v_add_i32_e64 v9, s[4:5], 64, v11
 ; SDAG-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT:    v_cmp_ne_u64_e64 s[4:5], 0, v[2:3]
-; SDAG-NEXT:    v_cndmask_b32_e64 v11, v11, 0, s[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v8, v9, v8, s[4:5]
+; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[2:3]
+; SDAG-NEXT:    v_cndmask_b32_e64 v11, 0, v11, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s[4:5]
 ; SDAG-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
 ; SDAG-NEXT:    v_sub_i32_e32 v10, vcc, v8, v10
 ; SDAG-NEXT:    v_subb_u32_e32 v11, vcc, v11, v18, vcc
@@ -1792,9 +1792,9 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_cndmask_b32_e64 v6, v14, v11, s[4:5]
 ; SDAG-NEXT:    v_ffbh_u32_e32 v11, v37
 ; SDAG-NEXT:    v_ffbh_u32_e32 v14, v36
-; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v12, v12, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e32 v19, v10, v7, vcc
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e32 v12, 0, v12, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v19, v7, v10, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v7, v15, v13, s[4:5]
 ; SDAG-NEXT:    v_or_b32_e32 v10, v37, v6
 ; SDAG-NEXT:    v_ffbh_u32_e32 v13, v6
@@ -1808,9 +1808,9 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_add_i32_e64 v11, s[4:5], 64, v14
 ; SDAG-NEXT:    v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5]
 ; SDAG-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v13, v13, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e32 v10, v11, v10, vcc
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e32 v13, 0, v13, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
 ; SDAG-NEXT:    v_sub_i32_e32 v10, vcc, v10, v19
 ; SDAG-NEXT:    v_subb_u32_e32 v11, vcc, v13, v12, vcc
 ; SDAG-NEXT:    v_xor_b32_e32 v14, 0x7f, v10
@@ -2493,12 +2493,12 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_addc_u32_e64 v20, s[4:5], 0, 0, vcc
 ; SDAG-NEXT:    v_add_i32_e32 v19, vcc, 64, v19
 ; SDAG-NEXT:    v_addc_u32_e64 v21, s[4:5], 0, 0, vcc
-; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
-; SDAG-NEXT:    v_cndmask_b32_e64 v20, v20, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e32 v16, v17, v16, vcc
-; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
-; SDAG-NEXT:    v_cndmask_b32_e64 v17, v21, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e32 v18, v19, v18, vcc
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
+; SDAG-NEXT:    v_cndmask_b32_e32 v20, 0, v20, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v16, v16, v17, vcc
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; SDAG-NEXT:    v_cndmask_b32_e32 v17, 0, v21, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v18, v18, v19, vcc
 ; SDAG-NEXT:    v_sub_i32_e32 v18, vcc, v16, v18
 ; SDAG-NEXT:    v_subb_u32_e32 v19, vcc, v20, v17, vcc
 ; SDAG-NEXT:    v_xor_b32_e32 v16, 0x7f, v18
@@ -2668,12 +2668,12 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_addc_u32_e64 v20, s[4:5], 0, 0, vcc
 ; SDAG-NEXT:    v_add_i32_e32 v19, vcc, 64, v19
 ; SDAG-NEXT:    v_addc_u32_e64 v21, s[4:5], 0, 0, vcc
-; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
-; SDAG-NEXT:    v_cndmask_b32_e64 v20, v20, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e32 v16, v17, v16, vcc
-; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v17, v21, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e32 v18, v19, v18, vcc
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
+; SDAG-NEXT:    v_cndmask_b32_e32 v20, 0, v20, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v16, v16, v17, vcc
+; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e32 v17, 0, v21, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v18, v18, v19, vcc
 ; SDAG-NEXT:    v_sub_i32_e32 v16, vcc, v16, v18
 ; SDAG-NEXT:    v_subb_u32_e32 v17, vcc, v20, v17, vcc
 ; SDAG-NEXT:    v_xor_b32_e32 v18, 0x7f, v16
diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll
index 61c0b8b861d5b..672f83413b05d 100644
--- a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll
@@ -394,10 +394,10 @@ define <2 x double> @extract_2xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0xbff00000
-; GCN-NEXT:    v_cmp_lt_f64_e32 vcc, -1.0, v[4:5]
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v0, -2.0, vcc
-; GCN-NEXT:    v_cmp_lt_f64_e32 vcc, -1.0, v[6:7]
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v0, -2.0, vcc
+; GCN-NEXT:    v_cmp_nlt_f64_e32 vcc, -1.0, v[4:5]
+; GCN-NEXT:    v_cndmask_b32_e32 v1, -2.0, v0, vcc
+; GCN-NEXT:    v_cmp_nlt_f64_e32 vcc, -1.0, v[6:7]
+; GCN-NEXT:    v_cndmask_b32_e32 v3, -2.0, v0, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
index 30806565da7df..7355bd66fc12a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
@@ -69,7 +69,7 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v1
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42b17218
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v1
@@ -123,7 +123,7 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v3
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42b17218
@@ -179,7 +179,7 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v3
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42b17218
@@ -424,14 +424,14 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v5, v0
 ; VI-GISEL-NEXT:    v_ldexp_f32 v2, v2, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v3
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x42b17218
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v6, 0x7f800000
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v5, v1
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v3
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v3
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v4
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s1
@@ -504,13 +504,13 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v5, v0
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v2, v2, v3
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v4
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x42b17218
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v5, v1
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v4
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
@@ -585,13 +585,13 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v5, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v2, v2, v3
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v4
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x42b17218
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v5, v1
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v4
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
@@ -960,7 +960,7 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v5, v5
 ; VI-GISEL-NEXT:    v_ldexp_f32 v0, v0, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v3
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x42b17218
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v4
@@ -981,12 +981,12 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v6, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v3
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v3
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
 ; VI-GISEL-NEXT:    v_ldexp_f32 v2, v6, v2
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v3
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v3
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v4
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s4
@@ -1080,7 +1080,7 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
 ; GFX900-GISEL-NEXT:    v_add_f32_e32 v1, v6, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v3
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v4
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x42b17218
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v6, v1
@@ -1088,12 +1088,12 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v4
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v2, v6, v2
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v4
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
@@ -1189,7 +1189,7 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
 ; SI-GISEL-NEXT:    v_add_f32_e32 v1, v6, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v3
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v4
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x42b17218
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v6, v1
@@ -1197,12 +1197,12 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v4
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v2, v6, v2
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v4
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
@@ -1719,7 +1719,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; VI-GISEL-NEXT:    v_add_f32_e32 v1, v6, v1
 ; VI-GISEL-NEXT:    v_cvt_i32_f32_e32 v6, v7
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
 ; VI-GISEL-NEXT:    s_and_b32 s0, s2, 0xfffff000
@@ -1752,18 +1752,18 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; VI-GISEL-NEXT:    v_add_f32_e32 v2, v2, v3
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v4
 ; VI-GISEL-NEXT:    v_cvt_i32_f32_e32 v3, v8
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v8, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
 ; VI-GISEL-NEXT:    v_ldexp_f32 v3, v8, v3
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s4
@@ -1855,7 +1855,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v4
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v4
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    v_mul_f32_e32 v1, s1, v2
 ; GFX900-GISEL-NEXT:    v_fma_f32 v6, s1, v2, -v1
@@ -1878,7 +1878,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v8, v9
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v6, v6
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v4
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v6, v6, v8
 ; GFX900-GISEL-NEXT:    v_mul_f32_e32 v8, s3, v2
@@ -1891,12 +1891,12 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v8, v2
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v4
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v3, v8, v3
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v4
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
@@ -1989,7 +1989,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v4
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v4
 ; SI-GISEL-NEXT:    s_mov_b32 s6, -1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v1, s1, v2
@@ -2013,7 +2013,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v8, v9
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v6, v6
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v4
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v6, v6, v8
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v8, s3, v2
@@ -2026,12 +2026,12 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v8, v2
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v4
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v3, v8, v3
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v4
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
@@ -2565,7 +2565,7 @@ define float @v_exp_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -2611,7 +2611,7 @@ define float @v_exp_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -2657,7 +2657,7 @@ define float @v_exp_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -2725,9 +2725,9 @@ define float @v_exp_fabs_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, v2
+; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2771,9 +2771,9 @@ define float @v_exp_fabs_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, v2
+; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2817,9 +2817,9 @@ define float @v_exp_fabs_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, v2
+; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2886,9 +2886,9 @@ define float @v_exp_fneg_fabs_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -|v0|, v2
+; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -|v0|, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2932,9 +2932,9 @@ define float @v_exp_fneg_fabs_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -|v0|, v2
+; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -|v0|, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2978,9 +2978,9 @@ define float @v_exp_fneg_fabs_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -|v0|, v2
+; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -|v0|, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3048,9 +3048,9 @@ define float @v_exp_fneg_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -v0, v2
+; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3094,9 +3094,9 @@ define float @v_exp_fneg_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3140,9 +3140,9 @@ define float @v_exp_fneg_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -v0, v2
+; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3400,7 +3400,7 @@ define float @v_exp_f32_ninf(float %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -3438,7 +3438,7 @@ define float @v_exp_f32_ninf(float %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -3476,7 +3476,7 @@ define float @v_exp_f32_ninf(float %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -3770,7 +3770,7 @@ define float @v_exp_f32_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -3816,7 +3816,7 @@ define float @v_exp_f32_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -3862,7 +3862,7 @@ define float @v_exp_f32_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -3928,7 +3928,7 @@ define float @v_exp_f32_nnan(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -3974,7 +3974,7 @@ define float @v_exp_f32_nnan(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -4020,7 +4020,7 @@ define float @v_exp_f32_nnan(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -4086,7 +4086,7 @@ define float @v_exp_f32_nnan_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -4132,7 +4132,7 @@ define float @v_exp_f32_nnan_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -4178,7 +4178,7 @@ define float @v_exp_f32_nnan_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -4244,7 +4244,7 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -4290,7 +4290,7 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -4336,7 +4336,7 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -4397,7 +4397,7 @@ define float @v_exp_f32_ninf_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4435,7 +4435,7 @@ define float @v_exp_f32_ninf_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4473,7 +4473,7 @@ define float @v_exp_f32_ninf_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4531,7 +4531,7 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4569,7 +4569,7 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4607,7 +4607,7 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4665,7 +4665,7 @@ define float @v_exp_f32_nnan_ninf(float %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4703,7 +4703,7 @@ define float @v_exp_f32_nnan_ninf(float %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4741,7 +4741,7 @@ define float @v_exp_f32_nnan_ninf(float %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4799,7 +4799,7 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4837,7 +4837,7 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4875,7 +4875,7 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4933,7 +4933,7 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4971,7 +4971,7 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5009,7 +5009,7 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5100,7 +5100,7 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -5146,7 +5146,7 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -5192,7 +5192,7 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -5244,7 +5244,7 @@ define float @v_exp_f32_undef() {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s4, v1
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s4, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42b17218
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
@@ -5283,7 +5283,7 @@ define float @v_exp_f32_undef() {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s4, v1
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s4, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42b17218
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
@@ -5322,7 +5322,7 @@ define float @v_exp_f32_undef() {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s4, v1
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s4, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42b17218
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
@@ -5416,7 +5416,7 @@ define float @v_exp_f32_from_fpext_f16(i16 %src.i) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -5462,7 +5462,7 @@ define float @v_exp_f32_from_fpext_f16(i16 %src.i) {
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v4
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v3
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v3
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
@@ -5510,7 +5510,7 @@ define float @v_exp_f32_from_fpext_f16(i16 %src.i) {
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v4
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v3
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v3
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
@@ -5584,7 +5584,7 @@ define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -5634,7 +5634,7 @@ define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -5688,7 +5688,7 @@ define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x42b17218
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v4
@@ -5905,7 +5905,7 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -5955,7 +5955,7 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -6009,7 +6009,7 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x42b17218
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v4
@@ -7032,7 +7032,7 @@ define float @v_exp_f32_contract(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -7078,7 +7078,7 @@ define float @v_exp_f32_contract(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -7124,7 +7124,7 @@ define float @v_exp_f32_contract(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -7190,7 +7190,7 @@ define float @v_exp_f32_contract_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -7236,7 +7236,7 @@ define float @v_exp_f32_contract_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -7282,7 +7282,7 @@ define float @v_exp_f32_contract_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -7343,7 +7343,7 @@ define float @v_exp_f32_contract_nnan_ninf(float %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -7381,7 +7381,7 @@ define float @v_exp_f32_contract_nnan_ninf(float %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -7419,7 +7419,7 @@ define float @v_exp_f32_contract_nnan_ninf(float %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
index 25e08219d371a..92067c3a0167b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
@@ -71,7 +71,7 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v1
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x421a209b
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v1
@@ -125,7 +125,7 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) {
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v3
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x421a209b
@@ -181,7 +181,7 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) {
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v3
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x421a209b
@@ -426,14 +426,14 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v5, v0
 ; VI-GISEL-NEXT:    v_ldexp_f32 v2, v2, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v3
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x421a209b
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v6, 0x7f800000
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v5, v1
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v3
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v3
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v4
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s1
@@ -506,13 +506,13 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v5, v0
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v2, v2, v3
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v4
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x421a209b
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v5, v1
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v4
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
@@ -587,13 +587,13 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v5, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v2, v2, v3
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v4
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x421a209b
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v5, v1
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v4
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
@@ -962,7 +962,7 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v5, v5
 ; VI-GISEL-NEXT:    v_ldexp_f32 v0, v0, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v3
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v3
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x421a209b
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v4
@@ -983,12 +983,12 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v6, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v3
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v3
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
 ; VI-GISEL-NEXT:    v_ldexp_f32 v2, v6, v2
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v3
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v3
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v4
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, s4
@@ -1082,7 +1082,7 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
 ; GFX900-GISEL-NEXT:    v_add_f32_e32 v1, v6, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v3
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v4
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x421a209b
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v6, v1
@@ -1090,12 +1090,12 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v4
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v2, v6, v2
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v4
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
@@ -1191,7 +1191,7 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
 ; SI-GISEL-NEXT:    v_add_f32_e32 v1, v6, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v3
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v4
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x421a209b
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v6, v1
@@ -1199,12 +1199,12 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v4
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v2, v6, v2
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v4
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
@@ -1721,7 +1721,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; VI-GISEL-NEXT:    v_add_f32_e32 v1, v6, v1
 ; VI-GISEL-NEXT:    v_cvt_i32_f32_e32 v6, v7
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
 ; VI-GISEL-NEXT:    s_and_b32 s0, s2, 0xfffff000
@@ -1754,18 +1754,18 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v7, 0x7f800000
 ; VI-GISEL-NEXT:    v_add_f32_e32 v2, v2, v3
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v4
 ; VI-GISEL-NEXT:    v_cvt_i32_f32_e32 v3, v8
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v8, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
 ; VI-GISEL-NEXT:    v_ldexp_f32 v3, v8, v3
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v4
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v4, s4
@@ -1857,7 +1857,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v4
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v4
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    v_mul_f32_e32 v1, s1, v2
 ; GFX900-GISEL-NEXT:    v_fma_f32 v6, s1, v2, -v1
@@ -1880,7 +1880,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v8, v9
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v6, v6
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v4
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v6, v6, v8
 ; GFX900-GISEL-NEXT:    v_mul_f32_e32 v8, s3, v2
@@ -1893,12 +1893,12 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v8, v2
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v4
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v3, v8, v3
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v4
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
@@ -1991,7 +1991,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v1, v4
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v0, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s0, v4
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v4
 ; SI-GISEL-NEXT:    s_mov_b32 s6, -1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v1, s1, v2
@@ -2015,7 +2015,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v8, v9
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v6, v6
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s1, v4
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v4
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v6, v6, v8
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v8, s3, v2
@@ -2028,12 +2028,12 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v8, v2
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s2, v4
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v4
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v3, v8, v3
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s3, v4
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v4
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
@@ -2567,7 +2567,7 @@ define float @v_exp10_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -2613,7 +2613,7 @@ define float @v_exp10_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -2659,7 +2659,7 @@ define float @v_exp10_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -2727,9 +2727,9 @@ define float @v_exp10_fabs_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, v2
+; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2773,9 +2773,9 @@ define float @v_exp10_fabs_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, v2
+; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2819,9 +2819,9 @@ define float @v_exp10_fabs_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, v2
+; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2888,9 +2888,9 @@ define float @v_exp10_fneg_fabs_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -|v0|, v2
+; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -|v0|, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2934,9 +2934,9 @@ define float @v_exp10_fneg_fabs_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -|v0|, v2
+; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -|v0|, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -2980,9 +2980,9 @@ define float @v_exp10_fneg_fabs_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -|v0|, v2
+; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -|v0|, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3050,9 +3050,9 @@ define float @v_exp10_fneg_f32(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -v0, v2
+; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3096,9 +3096,9 @@ define float @v_exp10_fneg_f32(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3142,9 +3142,9 @@ define float @v_exp10_fneg_f32(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e64 vcc, -v0, v2
+; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 s[4:5], -v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[4:5]
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc, -v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -3420,7 +3420,7 @@ define float @v_exp10_f32_ninf(float %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -3458,7 +3458,7 @@ define float @v_exp10_f32_ninf(float %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -3496,7 +3496,7 @@ define float @v_exp10_f32_ninf(float %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -3828,7 +3828,7 @@ define float @v_exp10_f32_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -3874,7 +3874,7 @@ define float @v_exp10_f32_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -3920,7 +3920,7 @@ define float @v_exp10_f32_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -3986,7 +3986,7 @@ define float @v_exp10_f32_nnan(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -4032,7 +4032,7 @@ define float @v_exp10_f32_nnan(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -4078,7 +4078,7 @@ define float @v_exp10_f32_nnan(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -4144,7 +4144,7 @@ define float @v_exp10_f32_nnan_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -4190,7 +4190,7 @@ define float @v_exp10_f32_nnan_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -4236,7 +4236,7 @@ define float @v_exp10_f32_nnan_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -4302,7 +4302,7 @@ define float @v_exp10_f32_nnan_dynamic(float %in) #1 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -4348,7 +4348,7 @@ define float @v_exp10_f32_nnan_dynamic(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -4394,7 +4394,7 @@ define float @v_exp10_f32_nnan_dynamic(float %in) #1 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -4455,7 +4455,7 @@ define float @v_exp10_f32_ninf_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4493,7 +4493,7 @@ define float @v_exp10_f32_ninf_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4531,7 +4531,7 @@ define float @v_exp10_f32_ninf_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4589,7 +4589,7 @@ define float @v_exp10_f32_ninf_dynamic(float %in) #1 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4627,7 +4627,7 @@ define float @v_exp10_f32_ninf_dynamic(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4665,7 +4665,7 @@ define float @v_exp10_f32_ninf_dynamic(float %in) #1 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4723,7 +4723,7 @@ define float @v_exp10_f32_nnan_ninf(float %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4761,7 +4761,7 @@ define float @v_exp10_f32_nnan_ninf(float %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4799,7 +4799,7 @@ define float @v_exp10_f32_nnan_ninf(float %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4857,7 +4857,7 @@ define float @v_exp10_f32_nnan_ninf_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4895,7 +4895,7 @@ define float @v_exp10_f32_nnan_ninf_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4933,7 +4933,7 @@ define float @v_exp10_f32_nnan_ninf_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4991,7 +4991,7 @@ define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5029,7 +5029,7 @@ define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5067,7 +5067,7 @@ define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5178,7 +5178,7 @@ define float @v_exp10_f32_dynamic_mode(float %in) #1 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -5224,7 +5224,7 @@ define float @v_exp10_f32_dynamic_mode(float %in) #1 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -5270,7 +5270,7 @@ define float @v_exp10_f32_dynamic_mode(float %in) #1 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -5322,7 +5322,7 @@ define float @v_exp10_f32_undef() {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s4, v1
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s4, v1
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x421a209b
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
@@ -5361,7 +5361,7 @@ define float @v_exp10_f32_undef() {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s4, v1
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s4, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x421a209b
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
@@ -5400,7 +5400,7 @@ define float @v_exp10_f32_undef() {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, s4, v1
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, s4, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x421a209b
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
@@ -5494,7 +5494,7 @@ define float @v_exp10_f32_from_fpext_f16(i16 %src.i) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -5540,7 +5540,7 @@ define float @v_exp10_f32_from_fpext_f16(i16 %src.i) {
 ; GFX900-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v4
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v3
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v3
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
@@ -5588,7 +5588,7 @@ define float @v_exp10_f32_from_fpext_f16(i16 %src.i) {
 ; SI-GISEL-NEXT:    v_cvt_i32_f32_e32 v2, v4
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v3
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v3
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
@@ -5662,7 +5662,7 @@ define float @v_exp10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -5712,7 +5712,7 @@ define float @v_exp10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -5766,7 +5766,7 @@ define float @v_exp10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x421a209b
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v4
@@ -5998,7 +5998,7 @@ define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -6048,7 +6048,7 @@ define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -6102,7 +6102,7 @@ define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x421a209b
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v3
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v4
@@ -7126,7 +7126,7 @@ define float @v_exp10_f32_contract(float %in) {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -7172,7 +7172,7 @@ define float @v_exp10_f32_contract(float %in) {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -7218,7 +7218,7 @@ define float @v_exp10_f32_contract(float %in) {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -7284,7 +7284,7 @@ define float @v_exp10_f32_contract_daz(float %in) #0 {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -7330,7 +7330,7 @@ define float @v_exp10_f32_contract_daz(float %in) #0 {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -7376,7 +7376,7 @@ define float @v_exp10_f32_contract_daz(float %in) #0 {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x421a209b
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v0, v2
@@ -7437,7 +7437,7 @@ define float @v_exp10_f32_contract_nnan_ninf(float %in) {
 ; VI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; VI-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -7475,7 +7475,7 @@ define float @v_exp10_f32_contract_nnan_ninf(float %in) {
 ; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; GFX900-GISEL-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; GFX900-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -7513,7 +7513,7 @@ define float @v_exp10_f32_contract_nnan_ninf(float %in) {
 ; SI-GISEL-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-GISEL-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; SI-GISEL-NEXT:    v_cmp_ge_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v2
 ; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
index b0b875c2b11a1..6deabc8ec5b28 100644
--- a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
@@ -163,12 +163,12 @@ define i64 @cmpxchg_private_i64(ptr addrspace(5) %ptr) {
 ; GCN-NEXT:    s_mov_b32 s7, 0xf000
 ; GCN-NEXT:    s_mov_b32 s6, -1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN-NEXT:    v_cndmask_b32_e64 v4, v1, 0, vcc
+; GCN-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GCN-NEXT:    v_cndmask_b32_e32 v4, 0, v1, vcc
 ; GCN-NEXT:    buffer_store_dword v4, v3, s[0:3], 0 offen
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v0, 1, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v3, 1, v0, vcc
 ; GCN-NEXT:    s_waitcnt expcnt(0)
-; GCN-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v4, 1, 0, vcc
 ; GCN-NEXT:    buffer_store_dword v3, v2, s[0:3], 0 offen
 ; GCN-NEXT:    buffer_store_byte v4, off, s[4:7], 0
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/rem_i128.ll b/llvm/test/CodeGen/AMDGPU/rem_i128.ll
index 6512bee36e88b..275e2b99f3ca8 100644
--- a/llvm/test/CodeGen/AMDGPU/rem_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/rem_i128.ll
@@ -47,9 +47,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, 64, v7
 ; GFX9-NEXT:    v_addc_co_u32_e64 v8, s[6:7], 0, 0, vcc
-; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
 ; GFX9-NEXT:    v_ffbh_u32_e32 v10, v3
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v7, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
 ; GFX9-NEXT:    v_ffbh_u32_e32 v7, v2
 ; GFX9-NEXT:    v_add_u32_e32 v7, 32, v7
 ; GFX9-NEXT:    v_min_u32_e32 v7, v7, v10
@@ -57,13 +57,13 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_add_u32_e32 v10, 32, v10
 ; GFX9-NEXT:    v_ffbh_u32_e32 v11, v1
 ; GFX9-NEXT:    v_min_u32_e32 v10, v10, v11
-; GFX9-NEXT:    v_cndmask_b32_e64 v8, v8, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v8, 0, v8, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v10, vcc, 64, v10
 ; GFX9-NEXT:    v_addc_co_u32_e64 v11, s[6:7], 0, 0, vcc
-; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v9, 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v7, v10, v7, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v11, v11, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v7, v7, v10, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v11, 0, v11, vcc
 ; GFX9-NEXT:    v_sub_co_u32_e32 v6, vcc, v6, v7
 ; GFX9-NEXT:    v_subb_co_u32_e32 v7, vcc, v8, v11, vcc
 ; GFX9-NEXT:    v_subb_co_u32_e32 v8, vcc, 0, v9, vcc
@@ -1518,9 +1518,9 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX9-NEXT:    v_add_co_u32_e32 v9, vcc, 64, v9
 ; GFX9-NEXT:    v_addc_co_u32_e64 v10, s[6:7], 0, 0, vcc
-; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
+; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[6:7]
 ; GFX9-NEXT:    v_ffbh_u32_e32 v11, v3
-; GFX9-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
 ; GFX9-NEXT:    v_ffbh_u32_e32 v9, v2
 ; GFX9-NEXT:    v_add_u32_e32 v9, 32, v9
 ; GFX9-NEXT:    v_min_u32_e32 v9, v9, v11
@@ -1528,13 +1528,13 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_add_u32_e32 v11, 32, v11
 ; GFX9-NEXT:    v_ffbh_u32_e32 v12, v1
 ; GFX9-NEXT:    v_min_u32_e32 v11, v11, v12
-; GFX9-NEXT:    v_cndmask_b32_e64 v10, v10, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v11, vcc, 64, v11
 ; GFX9-NEXT:    v_addc_co_u32_e64 v12, s[6:7], 0, 0, vcc
-; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
 ; GFX9-NEXT:    s_mov_b64 s[6:7], 0x7f
-; GFX9-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v12, v12, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v12, 0, v12, vcc
 ; GFX9-NEXT:    v_sub_co_u32_e32 v8, vcc, v8, v9
 ; GFX9-NEXT:    v_subb_co_u32_e32 v9, vcc, v10, v12, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v11, 0
diff --git a/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll b/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll
index 5618f77fb3d86..33b8ea46373d6 100644
--- a/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefix=GCN
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefix=GCN
 
-define amdgpu_cs void @test(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
-; GCN-LABEL: test:
+define amdgpu_cs void @test_u32(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32:
 ; GCN:       ; %bb.0: ; %.entry
 ; GCN-NEXT:    v_cmp_ne_u32_e32 vcc_lo, -1, v0
 ; GCN-NEXT:    v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, 0, v3
@@ -24,8 +24,8 @@ define amdgpu_cs void @test(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32
   ret void
 }
 
-define amdgpu_cs void @test_negative_case(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
-; GCN-LABEL: test_negative_case:
+define amdgpu_cs void @test_u32_negative_case(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_negative_case:
 ; GCN:       ; %bb.0: ; %.entry
 ; GCN-NEXT:    v_cmp_ne_u32_e32 vcc_lo, -1, v0
 ; GCN-NEXT:    v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, 0, v3
@@ -44,4 +44,78 @@ define amdgpu_cs void @test_negative_case(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q
   %ret3 = insertelement <4 x i32> %ret2, i32 %val4, i32 3
   store <4 x i32> %ret3, ptr addrspace(1) %out
   ret void
-}
\ No newline at end of file
+}
+
+define amdgpu_cs void @test_u64(i64 %a, i64 %x, i64 %y, i64 %p, i64 %q, i64 %r, i64 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_ne_u64_e32 vcc_lo, -1, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v8, 0, v8
+; GCN-NEXT:    v_dual_cndmask_b32 v9, 0, v9 :: v_dual_cndmask_b32 v10, v10, v12
+; GCN-NEXT:    v_dual_cndmask_b32 v11, v11, v13 :: v_dual_cndmask_b32 v2, 0, v6
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[14:15], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[14:15], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp eq i64 %a, -1
+  %val1 = select i1 %vcc, i64 %x, i64 %y
+  %val2 = select i1 %vcc, i64 0, i64 %p
+  %val3 = select i1 %vcc, i64 0, i64 %q
+  %val4 = select i1 %vcc, i64 %r, i64 %s
+  %ret0 = insertelement <4 x i64> poison, i64 %val1, i32 0
+  %ret1 = insertelement <4 x i64> %ret0, i64 %val2, i32 1
+  %ret2 = insertelement <4 x i64> %ret1, i64 %val3, i32 2
+  %ret3 = insertelement <4 x i64> %ret2, i64 %val4, i32 3
+  store <4 x i64> %ret3, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f32(float %a, float %x, float %y, float %p, float %q, float %r, float %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_neq_f32_e32 vcc_lo, 1.0, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, 0, v3
+; GCN-NEXT:    v_dual_cndmask_b32 v2, 0, v4 :: v_dual_cndmask_b32 v3, v5, v6
+; GCN-NEXT:    global_store_b128 v[7:8], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp oeq float %a, 1.0
+  %val1 = select i1 %vcc, float %x, float %y
+  %val2 = select i1 %vcc, float 0.0, float %p
+  %val3 = select i1 %vcc, float 0.0, float %q
+  %val4 = select i1 %vcc, float %r, float %s
+  %ret0 = insertelement <4 x float> poison, float %val1, i32 0
+  %ret1 = insertelement <4 x float> %ret0, float %val2, i32 1
+  %ret2 = insertelement <4 x float> %ret1, float %val3, i32 2
+  %ret3 = insertelement <4 x float> %ret2, float %val4, i32 3
+  store <4 x float> %ret3, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f64(double %a, double %x, double %y, double %p, double %q, double %r, double %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_neq_f64_e32 vcc_lo, 1.0, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v8, 0, v8
+; GCN-NEXT:    v_dual_cndmask_b32 v9, 0, v9 :: v_dual_cndmask_b32 v10, v10, v12
+; GCN-NEXT:    v_dual_cndmask_b32 v11, v11, v13 :: v_dual_cndmask_b32 v2, 0, v6
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[14:15], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[14:15], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp oeq double %a, 1.0
+  %val1 = select i1 %vcc, double %x, double %y
+  %val2 = select i1 %vcc, double 0.0, double %p
+  %val3 = select i1 %vcc, double 0.0, double %q
+  %val4 = select i1 %vcc, double %r, double %s
+  %ret0 = insertelement <4 x double> poison, double %val1, i32 0
+  %ret1 = insertelement <4 x double> %ret0, double %val2, i32 1
+  %ret2 = insertelement <4 x double> %ret1, double %val3, i32 2
+  %ret3 = insertelement <4 x double> %ret2, double %val4, i32 3
+  store <4 x double> %ret3, ptr addrspace(1) %out
+  ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
index 79adc9ead62e1..867ad96fd5ed0 100644
--- a/llvm/test/CodeGen/AMDGPU/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
@@ -696,9 +696,9 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v0, v2
 ; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v1, v3, vcc
-; GFX6-NEXT:    v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1]
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v2, -1, vcc
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v3, -1, vcc
+; GFX6-NEXT:    v_cmp_ge_u64_e32 vcc, v[2:3], v[0:1]
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, -1, v2, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, -1, v3, vcc
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_uaddsat_i64:
@@ -706,9 +706,9 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v0, v2
 ; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v1, v3, vcc
-; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, -1, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v3, -1, vcc
+; GFX8-NEXT:    v_cmp_ge_u64_e32 vcc, v[2:3], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, -1, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, -1, v3, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_uaddsat_i64:
@@ -716,9 +716,9 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v0, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v1, v3, vcc
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1]
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, -1, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v3, -1, vcc
+; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, v[2:3], v[0:1]
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, -1, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, -1, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_uaddsat_i64:
@@ -726,9 +726,9 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v0, v2
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, -1, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, -1, vcc_lo
+; GFX10-NEXT:    v_cmp_ge_u64_e32 vcc_lo, v[2:3], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, -1, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, -1, v3, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_uaddsat_i64:
@@ -737,9 +737,8 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX11-NEXT:    v_add_co_u32 v2, vcc_lo, v0, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_add_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, -1, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, -1, vcc_lo
+; GFX11-NEXT:    v_cmp_ge_u64_e32 vcc_lo, v[2:3], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v0, -1, v2 :: v_dual_cndmask_b32 v1, -1, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %result = call i64 @llvm.uadd.sat.i64(i64 %lhs, i64 %rhs)
   ret i64 %result
diff --git a/llvm/test/CodeGen/AMDGPU/usubsat.ll b/llvm/test/CodeGen/AMDGPU/usubsat.ll
index 90491a07289a0..9e2d1eac3277c 100644
--- a/llvm/test/CodeGen/AMDGPU/usubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/usubsat.ll
@@ -732,9 +732,9 @@ define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v0, v2
 ; GFX6-NEXT:    v_subb_u32_e32 v3, vcc, v1, v3, vcc
-; GFX6-NEXT:    v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc
+; GFX6-NEXT:    v_cmp_le_u64_e32 vcc, v[2:3], v[0:1]
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_usubsat_i64:
@@ -742,9 +742,9 @@ define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v0, v2
 ; GFX8-NEXT:    v_subb_u32_e32 v3, vcc, v1, v3, vcc
-; GFX8-NEXT:    v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc
+; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, v[2:3], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_usubsat_i64:
@@ -752,9 +752,9 @@ define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_sub_co_u32_e32 v2, vcc, v0, v2
 ; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v1, v3, vcc
-; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, v[2:3], v[0:1]
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_usubsat_i64:
@@ -762,9 +762,9 @@ define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_sub_co_u32 v2, vcc_lo, v0, v2
 ; GFX10-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_gt_u64_e32 vcc_lo, v[2:3], v[0:1]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_le_u64_e32 vcc_lo, v[2:3], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_usubsat_i64:
@@ -772,9 +772,8 @@ define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_sub_co_u32 v2, vcc_lo, v0, v2
 ; GFX11-NEXT:    v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_u64_e32 vcc_lo, v[2:3], v[0:1]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GFX11-NEXT:    v_cmp_le_u64_e32 vcc_lo, v[2:3], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v0, 0, v2 :: v_dual_cndmask_b32 v1, 0, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %result = call i64 @llvm.usub.sat.i64(i64 %lhs, i64 %rhs)
   ret i64 %result

>From 5f807e6b3d39102af0f3e323d6a9a04202a1d09b Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Mon, 14 Apr 2025 13:49:38 +0200
Subject: [PATCH 3/3] update test, small code changes

---
 .../Target/AMDGPU/SIShrinkInstructions.cpp    |  65 ++-
 llvm/test/CodeGen/AMDGPU/fmed3.ll             |  24 +-
 llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll |  16 +-
 llvm/test/CodeGen/AMDGPU/fneg-combines.ll     |   4 +-
 llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll |  28 +-
 .../CodeGen/AMDGPU/short-select-cndmask.ll    | 121 -----
 llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll    | 429 ++++++++++++++++++
 7 files changed, 506 insertions(+), 181 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll

diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index f40655955fac8..81719f0ad9fb9 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -871,12 +871,20 @@ unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const {
     return AMDGPU::V_CMP_EQ_F32_e64;
   case AMDGPU::V_CMP_GE_F32_e64:
     return AMDGPU::V_CMP_NGE_F32_e64;
+  case AMDGPU::V_CMP_NGE_F32_e64:
+    return AMDGPU::V_CMP_GE_F32_e64;
   case AMDGPU::V_CMP_LE_F32_e64:
     return AMDGPU::V_CMP_NLE_F32_e64;
+  case AMDGPU::V_CMP_NLE_F32_e32:
+    return AMDGPU::V_CMP_LE_F32_e32;
   case AMDGPU::V_CMP_GT_F32_e64:
     return AMDGPU::V_CMP_NGT_F32_e64;
+  case AMDGPU::V_CMP_NGT_F32_e64:
+    return AMDGPU::V_CMP_GT_F32_e64;
   case AMDGPU::V_CMP_LT_F32_e64:
     return AMDGPU::V_CMP_NLT_F32_e64;
+  case AMDGPU::V_CMP_NLT_F32_e64:
+    return AMDGPU::V_CMP_LT_F32_e64;
   // float 64
   case AMDGPU::V_CMP_EQ_F64_e64:
     return AMDGPU::V_CMP_NEQ_F64_e64;
@@ -884,12 +892,20 @@ unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const {
     return AMDGPU::V_CMP_EQ_F64_e64;
   case AMDGPU::V_CMP_GE_F64_e64:
     return AMDGPU::V_CMP_NGE_F64_e64;
+  case AMDGPU::V_CMP_NGE_F64_e64:
+    return AMDGPU::V_CMP_GE_F32_e64;
   case AMDGPU::V_CMP_LE_F64_e64:
     return AMDGPU::V_CMP_NLE_F64_e64;
+  case AMDGPU::V_CMP_NLE_F64_e32:
+    return AMDGPU::V_CMP_LE_F64_e32;
   case AMDGPU::V_CMP_GT_F64_e64:
     return AMDGPU::V_CMP_NGT_F64_e64;
+  case AMDGPU::V_CMP_NGT_F64_e64:
+    return AMDGPU::V_CMP_GT_F32_e64;
   case AMDGPU::V_CMP_LT_F64_e64:
     return AMDGPU::V_CMP_NLT_F64_e64;
+  case AMDGPU::V_CMP_NLT_F64_e64:
+    return AMDGPU::V_CMP_LT_F64_e64;
   default:
     return 0;
   }
@@ -905,11 +921,11 @@ bool SIShrinkInstructions::shouldSwapCndOperands(
     MachineInstr *UseInst = Use.getParent();
     if (UseInst->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
       return false;
-    MachineOperand *Src0 = TII.getNamedOperand(*UseInst, AMDGPU::OpName::src0);
-    MachineOperand *Src1 = TII.getNamedOperand(*UseInst, AMDGPU::OpName::src1);
+    MachineOperand *Src0 = &UseInst->getOperand(2);
+    MachineOperand *Src1 = &UseInst->getOperand(4);
 
-    auto Src0Imm = Src0->isImm();
-    auto Src1Imm = Src1->isImm();
+    bool Src0Imm = Src0->isImm();
+    bool Src1Imm = Src1->isImm();
 
     if (!Src1Imm && Src0Imm)
       return false;
@@ -922,18 +938,18 @@ bool SIShrinkInstructions::shouldSwapCndOperands(
   return ShouldSwap;
 }
 
-void swapCndOperands(MachineInstr &MI) {
-  MachineOperand Op2 = MI.getOperand(2);
+static void swapCndOperands(MachineInstr &MI) {
+  MachineOperand *Op2 = &MI.getOperand(2);
   MachineOperand Op4 = MI.getOperand(4);
 
-  if (Op2.isReg()) {
+  if (Op2->isReg()) {
     MI.getOperand(4).ChangeToRegister(
-        Op2.getReg(), Op2.isDef(), Op2.isImplicit(), Op2.isKill(), Op2.isDead(),
-        Op2.isUndef(), Op2.isDebug());
-    if (Op2.getSubReg() != AMDGPU::NoSubRegister)
-      MI.getOperand(4).setSubReg(Op2.getSubReg());
-  } else if (Op2.isImm()) {
-    MI.getOperand(4).ChangeToImmediate(Op2.getImm());
+        Op2->getReg(), Op2->isDef(), Op2->isImplicit(), Op2->isKill(),
+        Op2->isDead(), Op2->isUndef(), Op2->isDebug());
+    if (Op2->getSubReg() != AMDGPU::NoSubRegister)
+      MI.getOperand(4).setSubReg(Op2->getSubReg());
+  } else if (Op2->isImm()) {
+    MI.getOperand(4).ChangeToImmediate(Op2->getImm());
   }
 
   if (Op4.isReg()) {
@@ -944,20 +960,23 @@ void swapCndOperands(MachineInstr &MI) {
     MI.getOperand(2).ChangeToImmediate(Op4.getImm());
   }
 
-  MachineOperand Op1 = MI.getOperand(1);
-  MachineOperand Op3 = MI.getOperand(3);
-  MI.getOperand(1).setImm(Op3.getImm());
-  MI.getOperand(3).setImm(Op1.getImm());
+  auto Op1Imm = MI.getOperand(1).getImm();
+  auto Op3Imm = MI.getOperand(3).getImm();
+  MI.getOperand(1).setImm(Op3Imm);
+  MI.getOperand(3).setImm(Op1Imm);
 }
 
 Register SIShrinkInstructions::trySwapCndOperands(MachineInstr &MI) const {
   Register Reg = MI.getOperand(0).getReg();
 
+  dbgs() << "cmp : \n";
+  MI.dump();
+
   unsigned Opcode = getInverseCompareOpcode(MI);
   SmallVector<MachineOperand *, 4> UsesToProcess;
   if (!Opcode ||
       !SIShrinkInstructions::shouldSwapCndOperands(MI, *TII, UsesToProcess))
-    return AMDGPU::NoRegister;
+    return Reg;
 
   auto DL = MI.getDebugLoc();
   Register NewVCC = MRI->createVirtualRegister(MRI->getRegClass(Reg));
@@ -967,11 +986,11 @@ Register SIShrinkInstructions::trySwapCndOperands(MachineInstr &MI) const {
   InverseCompare->setFlags(MI.getFlags());
 
   unsigned OpNum = MI.getNumExplicitOperands();
-  for (unsigned i = 1; i < OpNum; i++) {
-    MachineOperand Op = MI.getOperand(i);
+  for (unsigned Idx = 1; Idx < OpNum; Idx++) {
+    MachineOperand Op = MI.getOperand(Idx);
     InverseCompare.add(Op);
     if (Op.isReg() && Op.isKill())
-      InverseCompare->getOperand(i).setIsKill(false);
+      InverseCompare->getOperand(Idx).setIsKill(false);
   }
 
   for (auto &Use : UsesToProcess) {
@@ -1153,6 +1172,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
           // dst.
           Register DstReg = Op0.getReg();
           if (DstReg.isVirtual()) {
+            DstReg = trySwapCndOperands(MI);
             // VOPC instructions can only write to the VCC register. We can't
             // force them to use VCC here, because this is only one register and
             // cannot deal with sequences which would require multiple copies of
@@ -1162,9 +1182,6 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
             // provide a hint to the register allocator to use VCC and then we
             // will run this pass again after RA and shrink it if it outputs to
             // VCC.
-            Register NewVCC = trySwapCndOperands(MI);
-            DstReg = NewVCC == AMDGPU::NoRegister ? DstReg : NewVCC;
-
             MRI->setRegAllocationHint(DstReg, 0, VCCReg);
             continue;
           }
diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll
index db0c5362bdc5f..e502690e463bd 100644
--- a/llvm/test/CodeGen/AMDGPU/fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll
@@ -1032,10 +1032,10 @@ define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out,
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_add_f32_e32 v2, 1.0, v3
-; VI-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, 2.0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 2.0, vcc
-; VI-GISEL-NEXT:    v_cmp_ngt_f32_e32 vcc, 4.0, v2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, 4.0, vcc
+; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 2.0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 2.0, v2, vcc
+; VI-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, 4.0, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 4.0, v2, vcc
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
@@ -1059,10 +1059,10 @@ define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out,
 ; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[2:3]
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    v_add_f32_e32 v1, 1.0, v1
-; GFX9-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc, 2.0, v1
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 2.0, vcc
-; GFX9-GISEL-NEXT:    v_cmp_ngt_f32_e32 vcc, 4.0, v1
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc
+; GFX9-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 2.0, v1
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, 2.0, v1, vcc
+; GFX9-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, 4.0, v1
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, 4.0, v1, vcc
 ; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
 ;
@@ -1090,11 +1090,11 @@ define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out,
 ; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3]
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-GISEL-NEXT:    v_add_f32_e32 v1, 1.0, v1
-; GFX11-GISEL-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, 2.0, v1
-; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 2.0, vcc_lo
+; GFX11-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc_lo, 2.0, v1
+; GFX11-GISEL-NEXT:    v_cndmask_b32_e32 v1, 2.0, v1, vcc_lo
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 4.0, v1
-; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
+; GFX11-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 4.0, v1
+; GFX11-GISEL-NEXT:    v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo
 ; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
index f199db3ca12ca..0084125fa408b 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
@@ -607,8 +607,8 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
 ; SI-SAFE-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v1
 ; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
 ; SI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
-; SI-SAFE-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
+; SI-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-SAFE-NEXT:    ; return to shader part epilog
 ;
 ; SI-NSZ-LABEL: fneg_fadd_0_f16:
@@ -635,8 +635,8 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
 ; SI-NSZ-NEXT:    v_cmp_nlt_f32_e64 vcc, -v0, v1
 ; SI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; SI-NSZ-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
-; SI-NSZ-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-NSZ-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-NSZ-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
+; SI-NSZ-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-NSZ-NEXT:    ; return to shader part epilog
 ;
 ; VI-SAFE-LABEL: fneg_fadd_0_f16:
@@ -744,8 +744,8 @@ define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <
 ; SI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
 ; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v0, 0, v0
-; SI-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
-; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v0
+; SI-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-SAFE-NEXT:    ; return to shader part epilog
 ;
 ; SI-NSZ-LABEL: fneg_fadd_0_nsz_f16:
@@ -759,8 +759,8 @@ define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <
 ; SI-NSZ-NEXT:    v_mul_f32_e32 v0, 0x80000000, v0
 ; SI-NSZ-NEXT:    v_cmp_nlt_f32_e64 vcc, -v0, v1
 ; SI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; SI-NSZ-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-NSZ-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; SI-NSZ-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
+; SI-NSZ-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; SI-NSZ-NEXT:    ; return to shader part epilog
 ;
 ; VI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
index 14c18df6d8e41..7ec56a8ad38bd 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
@@ -254,8 +254,8 @@ define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i
 ; GCN-NSZ-DAG: v_mul_f32_e32 [[D:v[0-9]+]], 0, [[A]]
 ; GCN-NSZ-DAG: v_cmp_ngt_f32_e32 {{.*}}, s{{[0-9]+}}, [[D]]
 ; GCN-NSZ-DAG: v_cndmask_b32_e64 [[E:v[0-9]+]], -[[D]], v{{[0-9]+}},
-; GCN-NSZ-DAG: v_cmp_nlt_f32_e32 {{.*}}, 0
-; GCN-NSZ-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, [[C]], 0,
+; GCN-NSZ-DAG: v_cmp_lt_f32_e32 {{.*}}, 0
+; GCN-NSZ-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc
 define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #2 {
 .entry:
   %tmp7 = fdiv afn float 1.000000e+00, %tmp6
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 7f51cbec5dc4e..3da28fd22d3d4 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -197,8 +197,8 @@ define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4
 ; SI-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
 ; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
 ; SI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
-; SI-SAFE-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
+; SI-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-SAFE-NEXT:    ; return to shader part epilog
 ;
 ; SI-NSZ-LABEL: fneg_fadd_0_f32:
@@ -221,8 +221,8 @@ define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4
 ; SI-NSZ-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
 ; SI-NSZ-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
 ; SI-NSZ-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
-; SI-NSZ-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-NSZ-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-NSZ-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
+; SI-NSZ-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-NSZ-NEXT:    ; return to shader part epilog
 ;
 ; VI-SAFE-LABEL: fneg_fadd_0_f32:
@@ -245,8 +245,8 @@ define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4
 ; VI-SAFE-NEXT:    v_mad_f32 v0, v0, 0, 0
 ; VI-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
 ; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v2, vcc
-; VI-SAFE-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
-; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
+; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-SAFE-NEXT:    ; return to shader part epilog
 ;
 ; VI-NSZ-LABEL: fneg_fadd_0_f32:
@@ -269,8 +269,8 @@ define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4
 ; VI-NSZ-NEXT:    v_mul_f32_e32 v0, 0, v0
 ; VI-NSZ-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
 ; VI-NSZ-NEXT:    v_cndmask_b32_e64 v0, -v0, v2, vcc
-; VI-NSZ-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
-; VI-NSZ-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-NSZ-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
+; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-NSZ-NEXT:    ; return to shader part epilog
 .entry:
   %tmp7 = fdiv float 1.000000e+00, %tmp6
@@ -294,8 +294,8 @@ define amdgpu_ps float @fneg_fadd_0_nsz_f32(float inreg %tmp2, float inreg %tmp6
 ; SI-SAFE-NEXT:    v_min_legacy_f32_e64 v0, 0, s0
 ; SI-SAFE-NEXT:    s_brev_b32 s0, 1
 ; SI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
-; SI-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
-; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v0
+; SI-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-SAFE-NEXT:    ; return to shader part epilog
 ;
 ; GCN-NSZ-LABEL: fneg_fadd_0_nsz_f32:
@@ -306,8 +306,8 @@ define amdgpu_ps float @fneg_fadd_0_nsz_f32(float inreg %tmp2, float inreg %tmp6
 ; GCN-NSZ-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
 ; GCN-NSZ-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
 ; GCN-NSZ-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
-; GCN-NSZ-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
-; GCN-NSZ-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GCN-NSZ-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
+; GCN-NSZ-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GCN-NSZ-NEXT:    ; return to shader part epilog
 ;
 ; VI-SAFE-LABEL: fneg_fadd_0_nsz_f32:
@@ -319,8 +319,8 @@ define amdgpu_ps float @fneg_fadd_0_nsz_f32(float inreg %tmp2, float inreg %tmp6
 ; VI-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
 ; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
 ; VI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
-; VI-SAFE-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
-; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
+; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-SAFE-NEXT:    ; return to shader part epilog
 .entry:
   %tmp7 = fdiv afn float 1.000000e+00, %tmp6
diff --git a/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll b/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll
deleted file mode 100644
index 33b8ea46373d6..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll
+++ /dev/null
@@ -1,121 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefix=GCN
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefix=GCN
-
-define amdgpu_cs void @test_u32(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
-; GCN-LABEL: test_u32:
-; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_ne_u32_e32 vcc_lo, -1, v0
-; GCN-NEXT:    v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, 0, v3
-; GCN-NEXT:    v_dual_cndmask_b32 v2, 0, v4 :: v_dual_cndmask_b32 v3, v5, v6
-; GCN-NEXT:    global_store_b128 v[7:8], v[0:3], off
-; GCN-NEXT:    s_endpgm
-.entry:
-  %vcc = icmp eq i32 %a, -1
-  %val1 = select i1 %vcc, i32 %x, i32 %y
-  %val2 = select i1 %vcc, i32 0, i32 %p
-  %val3 = select i1 %vcc, i32 0, i32 %q
-  %val4 = select i1 %vcc, i32 %r, i32 %s
-  %ret0 = insertelement <4 x i32> poison, i32 %val1, i32 0
-  %ret1 = insertelement <4 x i32> %ret0, i32 %val2, i32 1
-  %ret2 = insertelement <4 x i32> %ret1, i32 %val3, i32 2
-  %ret3 = insertelement <4 x i32> %ret2, i32 %val4, i32 3
-  store <4 x i32> %ret3, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_cs void @test_u32_negative_case(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
-; GCN-LABEL: test_u32_negative_case:
-; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_ne_u32_e32 vcc_lo, -1, v0
-; GCN-NEXT:    v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, 0, v3
-; GCN-NEXT:    v_dual_cndmask_b32 v2, 0, v4 :: v_dual_cndmask_b32 v3, v5, v6
-; GCN-NEXT:    global_store_b128 v[7:8], v[0:3], off
-; GCN-NEXT:    s_endpgm
-.entry:
-  %vcc = icmp eq i32 %a, -1
-  %val1 = select i1 %vcc, i32 %x, i32 %y
-  %val2 = select i1 %vcc, i32 0, i32 %p
-  %val3 = select i1 %vcc, i32 0, i32 %q
-  %val4 = select i1 %vcc, i32 %r, i32 %s
-  %ret0 = insertelement <4 x i32> poison, i32 %val1, i32 0
-  %ret1 = insertelement <4 x i32> %ret0, i32 %val2, i32 1
-  %ret2 = insertelement <4 x i32> %ret1, i32 %val3, i32 2
-  %ret3 = insertelement <4 x i32> %ret2, i32 %val4, i32 3
-  store <4 x i32> %ret3, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_cs void @test_u64(i64 %a, i64 %x, i64 %y, i64 %p, i64 %q, i64 %r, i64 %s, ptr addrspace(1) %out) {
-; GCN-LABEL: test_u64:
-; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_ne_u64_e32 vcc_lo, -1, v[0:1]
-; GCN-NEXT:    v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
-; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v8, 0, v8
-; GCN-NEXT:    v_dual_cndmask_b32 v9, 0, v9 :: v_dual_cndmask_b32 v10, v10, v12
-; GCN-NEXT:    v_dual_cndmask_b32 v11, v11, v13 :: v_dual_cndmask_b32 v2, 0, v6
-; GCN-NEXT:    s_clause 0x1
-; GCN-NEXT:    global_store_b128 v[14:15], v[8:11], off offset:16
-; GCN-NEXT:    global_store_b128 v[14:15], v[0:3], off
-; GCN-NEXT:    s_endpgm
-.entry:
-  %vcc = icmp eq i64 %a, -1
-  %val1 = select i1 %vcc, i64 %x, i64 %y
-  %val2 = select i1 %vcc, i64 0, i64 %p
-  %val3 = select i1 %vcc, i64 0, i64 %q
-  %val4 = select i1 %vcc, i64 %r, i64 %s
-  %ret0 = insertelement <4 x i64> poison, i64 %val1, i32 0
-  %ret1 = insertelement <4 x i64> %ret0, i64 %val2, i32 1
-  %ret2 = insertelement <4 x i64> %ret1, i64 %val3, i32 2
-  %ret3 = insertelement <4 x i64> %ret2, i64 %val4, i32 3
-  store <4 x i64> %ret3, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_cs void @test_f32(float %a, float %x, float %y, float %p, float %q, float %r, float %s, ptr addrspace(1) %out) {
-; GCN-LABEL: test_f32:
-; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_neq_f32_e32 vcc_lo, 1.0, v0
-; GCN-NEXT:    v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, 0, v3
-; GCN-NEXT:    v_dual_cndmask_b32 v2, 0, v4 :: v_dual_cndmask_b32 v3, v5, v6
-; GCN-NEXT:    global_store_b128 v[7:8], v[0:3], off
-; GCN-NEXT:    s_endpgm
-.entry:
-  %vcc = fcmp oeq float %a, 1.0
-  %val1 = select i1 %vcc, float %x, float %y
-  %val2 = select i1 %vcc, float 0.0, float %p
-  %val3 = select i1 %vcc, float 0.0, float %q
-  %val4 = select i1 %vcc, float %r, float %s
-  %ret0 = insertelement <4 x float> poison, float %val1, i32 0
-  %ret1 = insertelement <4 x float> %ret0, float %val2, i32 1
-  %ret2 = insertelement <4 x float> %ret1, float %val3, i32 2
-  %ret3 = insertelement <4 x float> %ret2, float %val4, i32 3
-  store <4 x float> %ret3, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_cs void @test_f64(double %a, double %x, double %y, double %p, double %q, double %r, double %s, ptr addrspace(1) %out) {
-; GCN-LABEL: test_f64:
-; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_neq_f64_e32 vcc_lo, 1.0, v[0:1]
-; GCN-NEXT:    v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
-; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v8, 0, v8
-; GCN-NEXT:    v_dual_cndmask_b32 v9, 0, v9 :: v_dual_cndmask_b32 v10, v10, v12
-; GCN-NEXT:    v_dual_cndmask_b32 v11, v11, v13 :: v_dual_cndmask_b32 v2, 0, v6
-; GCN-NEXT:    s_clause 0x1
-; GCN-NEXT:    global_store_b128 v[14:15], v[8:11], off offset:16
-; GCN-NEXT:    global_store_b128 v[14:15], v[0:3], off
-; GCN-NEXT:    s_endpgm
-.entry:
-  %vcc = fcmp oeq double %a, 1.0
-  %val1 = select i1 %vcc, double %x, double %y
-  %val2 = select i1 %vcc, double 0.0, double %p
-  %val3 = select i1 %vcc, double 0.0, double %q
-  %val4 = select i1 %vcc, double %r, double %s
-  %ret0 = insertelement <4 x double> poison, double %val1, i32 0
-  %ret1 = insertelement <4 x double> %ret0, double %val2, i32 1
-  %ret2 = insertelement <4 x double> %ret1, double %val3, i32 2
-  %ret3 = insertelement <4 x double> %ret2, double %val4, i32 3
-  store <4 x double> %ret3, ptr addrspace(1) %out
-  ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll b/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll
new file mode 100644
index 0000000000000..57bafcd691c17
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll
@@ -0,0 +1,429 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefix=GCN
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefix=GCN
+
+define amdgpu_cs void @test_u32_eq(i32 %a, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_eq:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 1, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT:    global_store_b64 v[5:6], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp eq i32 1, %a
+  %val1 = select i1 %vcc, i32 0, i32 %p
+  %val2 = select i1 %vcc, i32 0, i32 %q
+  %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+  store <2 x i32> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u32_ne(i32 %a, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_ne:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT:    global_store_b64 v[5:6], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp ne i32 1, %a
+  %val1 = select i1 %vcc, i32 0, i32 %p
+  %val2 = select i1 %vcc, i32 0, i32 %q
+  %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+  store <2 x i32> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u32_uge(i32 %a, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_uge:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 1, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT:    global_store_b64 v[5:6], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp uge i32 %a, 2
+  %val1 = select i1 %vcc, i32 %p, i32 0
+  %val2 = select i1 %vcc, i32 %q, i32 0
+  %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+  store <2 x i32> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u32_ule(i32 %a, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_ule:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 3, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT:    global_store_b64 v[5:6], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp ule i32 %a, 2
+  %val1 = select i1 %vcc, i32 %p, i32 0
+  %val2 = select i1 %vcc, i32 %q, i32 0
+  %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+  store <2 x i32> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u32_ugt(i32 %a, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_ugt:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_le_u32_e32 vcc_lo, 2, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT:    global_store_b64 v[5:6], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp ugt i32 2, %a ; le
+  %val1 = select i1 %vcc, i32 0, i32 %p
+  %val2 = select i1 %vcc, i32 0, i32 %q
+  %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+  store <2 x i32> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u32_ult(i32 %a, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_ult:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 2, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT:    global_store_b64 v[5:6], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp ult i32 2, %a
+  %val1 = select i1 %vcc, i32 0, i32 %p
+  %val2 = select i1 %vcc, i32 0, i32 %q
+  %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+  store <2 x i32> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u64_eq(i64 %argA, i64 %arg0, i64 %arg1, i64 %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_eq:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_ne_u64_e32 vcc_lo, 1, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 1, v6
+; GCN-NEXT:    global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp eq i64 1, %argA
+  %val1 = select i1 %vcc, i64 %arg0, i64 %arg1
+  %val2 = select i1 %vcc, i64 1, i64 %arg2
+  %ret0 = insertelement <2 x i64> poison, i64 %val1, i32 0
+  %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i32 1
+  store <2 x i64> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+
+define amdgpu_cs void @test_u64_ne(i64 %argA, i64 %arg0, i64 %arg1, i64 %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_ne:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 1, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 1, v6
+; GCN-NEXT:    global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp ne i64 1, %argA
+  %val1 = select i1 %vcc, i64 %arg0, i64 %arg1
+  %val2 = select i1 %vcc, i64 1, i64 %arg2
+  %ret0 = insertelement <2 x i64> poison, i64 %val1, i32 0
+  %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i32 1
+  store <2 x i64> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u64_uge(i64 %argA, i64 %arg0, i64 %arg1, i64 %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_uge:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_le_u64_e32 vcc_lo, 3, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 1, v6
+; GCN-NEXT:    global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp uge i64 2, %argA
+  %val1 = select i1 %vcc, i64 %arg0, i64 %arg1
+  %val2 = select i1 %vcc, i64 1, i64 %arg2
+  %ret0 = insertelement <2 x i64> poison, i64 %val1, i32 0
+  %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i32 1
+  store <2 x i64> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+
+define amdgpu_cs void @test_u64_ule(i64 %argA, i64 %arg0, i64 %arg1, i64 %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_ule:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_ge_u64_e32 vcc_lo, 1, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 1, v6
+; GCN-NEXT:    global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp ule i64 2, %argA
+  %val1 = select i1 %vcc, i64 %arg0, i64 %arg1
+  %val2 = select i1 %vcc, i64 1, i64 %arg2
+  %ret0 = insertelement <2 x i64> poison, i64 %val1, i32 0
+  %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i32 1
+  store <2 x i64> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u64_ugt(i64 %argA, i64 %arg0, i64 %arg1, i64 %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_ugt:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_le_u64_e32 vcc_lo, 2, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 1, v6
+; GCN-NEXT:    global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp ugt i64 2, %argA
+  %val1 = select i1 %vcc, i64 %arg0, i64 %arg1
+  %val2 = select i1 %vcc, i64 1, i64 %arg2
+  %ret0 = insertelement <2 x i64> poison, i64 %val1, i32 0
+  %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i32 1
+  store <2 x i64> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u64_ult(i64 %argA, i64 %arg0, i64 %arg1, i64 %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_ult:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_ge_u64_e32 vcc_lo, 2, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 1, v6
+; GCN-NEXT:    global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp ult i64 2, %argA
+  %val1 = select i1 %vcc, i64 %arg0, i64 %arg1
+  %val2 = select i1 %vcc, i64 1, i64 %arg2
+  %ret0 = insertelement <2 x i64> poison, i64 %val1, i32 0
+  %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i32 1
+  store <2 x i64> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f32_oeq(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_oeq:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_neq_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp oeq float %a, 2.0
+  %val1 = select i1 %vcc, float 0.0, float %p
+  %val2 = select i1 %vcc, float 0.0, float %q
+  %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+  %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+  store <2 x float> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+
+define amdgpu_cs void @test_f32_one(float %argA, float %arg0, float %arg1, float %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_one:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_neq_f32_e32 vcc_lo, 1.0, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, 0, v3
+; GCN-NEXT:    global_store_b64 v[4:5], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp oeq float %argA, 1.0
+  %val1 = select i1 %vcc, float %arg0, float %arg1
+  %val2 = select i1 %vcc, float 0.0, float %arg2
+  %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+  %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+  store <2 x float> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f32_oge(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_oge:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_le_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp oge float %a, 2.0
+  %val1 = select i1 %vcc, float %p, float 0.0
+  %val2 = select i1 %vcc, float %q, float 0.0
+  %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+  %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+  store <2 x float> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f32_ole(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_ole:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_nle_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp ole float 2.0, %a
+  %val1 = select i1 %vcc, float 0.0, float %p
+  %val2 = select i1 %vcc, float 0.0, float %q
+  %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+  %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+  store <2 x float> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f32_ogt(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_ogt:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp ogt float 2.0, %a
+  %val1 = select i1 %vcc, float 0.0, float %p
+  %val2 = select i1 %vcc, float 0.0, float %q
+  %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+  %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+  store <2 x float> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f32_olt(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_olt:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp olt float 2.0, %a
+  %val1 = select i1 %vcc, float 0.0, float %p
+  %val2 = select i1 %vcc, float 0.0, float %q
+  %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+  %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+  store <2 x float> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f64_oeq(double %argA, double %arg0, double %arg1, double %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_oeq:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_neq_f64_e32 vcc_lo, 1.0, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 0, v6
+; GCN-NEXT:    global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp oeq double %argA, 1.0
+  %val1 = select i1 %vcc, double %arg0, double %arg1
+  %val2 = select i1 %vcc, double 0.0, double %arg2
+  %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+  %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+  store <2 x double> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f64_one(double %argA, double %arg0, double %arg1, double %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_one:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_lg_f64_e32 vcc_lo, 1.0, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, v5, v3 :: v_dual_cndmask_b32 v0, v4, v2
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v7, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc_lo
+; GCN-NEXT:    global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp one double %argA, 1.0
+  %val1 = select i1 %vcc, double %arg0, double %arg1
+  %val2 = select i1 %vcc, double 0.0, double %arg2
+  %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+  %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+  store <2 x double> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f64_oge(double %argA, double %arg0, double %arg1, double %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_oge:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_nge_f64_e32 vcc_lo, 1.0, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 0, v6
+; GCN-NEXT:    global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp oge double 1.0, %argA
+  %val1 = select i1 %vcc, double %arg0, double %arg1
+  %val2 = select i1 %vcc, double 0.0, double %arg2
+  %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+  %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+  store <2 x double> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f64_ole(double %argA, double %arg0, double %arg1, double %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_ole:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_nle_f64_e32 vcc_lo, 1.0, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 0, v6
+; GCN-NEXT:    global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp ole double 1.0, %argA
+  %val1 = select i1 %vcc, double %arg0, double %arg1
+  %val2 = select i1 %vcc, double 0.0, double %arg2
+  %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+  %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+  store <2 x double> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f64_ogt(double %argA, double %arg0, double %arg1, double %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_ogt:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_ngt_f64_e32 vcc_lo, 1.0, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 0, v6
+; GCN-NEXT:    global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp ogt double 1.0, %argA
+  %val1 = select i1 %vcc, double %arg0, double %arg1
+  %val2 = select i1 %vcc, double 0.0, double %arg2
+  %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+  %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+  store <2 x double> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f64_olt(double %argA, double %arg0, double %arg1, double %arg2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_olt:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_nlt_f64_e32 vcc_lo, 1.0, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, v3, v5 :: v_dual_cndmask_b32 v0, v2, v4
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v7 :: v_dual_cndmask_b32 v2, 0, v6
+; GCN-NEXT:    global_store_b128 v[8:9], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp olt double 1.0, %argA
+  %val1 = select i1 %vcc, double %arg0, double %arg1
+  %val2 = select i1 %vcc, double 0.0, double %arg2
+  %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+  %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+  store <2 x double> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+