[llvm] [AMDGPU] Fix wrong reverse operations for `v_cmp_*` (PR #146398)

Shilei Tian via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 30 10:57:33 PDT 2025


https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/146398

Fixes SWDEV-538616.

>From 280d673dd061571069378a69e119ee0114e06c3b Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Mon, 30 Jun 2025 13:50:41 -0400
Subject: [PATCH] [AMDGPU] Fix wrong reverse operations for `v_cmp_*`

Fixes SWDEV-538616.
---
 llvm/lib/Target/AMDGPU/VOPCInstructions.td    |  32 +-
 llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll   |  60 +--
 llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll   |  10 +-
 ...-divergent-i1-phis-no-lane-mask-merging.ll |   8 +-
 ...vergence-divergent-i1-used-outside-loop.ll |   4 +-
 .../GlobalISel/divergence-structurizer.ll     |   8 +-
 .../GlobalISel/divergent-control-flow.ll      |   2 +-
 llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll   | 240 +++++------
 llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll   | 240 +++++------
 .../AMDGPU/GlobalISel/is-safe-to-sink-bug.ll  |   2 +-
 .../GlobalISel/llvm.amdgcn.ballot.i32.ll      |  22 +-
 .../GlobalISel/llvm.amdgcn.ballot.i64.ll      |  16 +-
 .../CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll  |   2 +-
 .../CodeGen/AMDGPU/GlobalISel/llvm.memset.ll  |   2 +-
 llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll   |  10 +-
 .../AMDGPU/GlobalISel/mul-known-bits.i64.ll   |   4 +-
 .../AMDGPU/GlobalISel/regbankselect-mui.ll    |  10 +-
 .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 132 +++----
 .../CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll     |   8 +-
 .../CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll     |  12 +-
 .../test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll | 252 ++++++------
 llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll    |  10 +-
 .../CodeGen/AMDGPU/GlobalISel/srem.i32.ll     |   8 +-
 .../CodeGen/AMDGPU/GlobalISel/srem.i64.ll     |  12 +-
 .../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll | 162 ++++----
 llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll   |  60 +--
 .../CodeGen/AMDGPU/GlobalISel/udiv.i32.ll     |   8 +-
 .../CodeGen/AMDGPU/GlobalISel/udiv.i64.ll     |  12 +-
 .../test/CodeGen/AMDGPU/GlobalISel/udivrem.ll | 252 ++++++------
 .../CodeGen/AMDGPU/GlobalISel/urem.i32.ll     |   8 +-
 .../CodeGen/AMDGPU/GlobalISel/urem.i64.ll     |  12 +-
 .../AMDGPU/GlobalISel/vni8-across-blocks.ll   |  24 +-
 .../AMDGPU/agpr-copy-no-free-registers.ll     |   4 +-
 .../AMDGPU/amdgpu-branch-weight-metadata.ll   |  18 +-
 .../AMDGPU/amdgpu-codegenprepare-idiv.ll      |  81 ++--
 llvm/test/CodeGen/AMDGPU/ashr64_reduce.ll     |  40 +-
 llvm/test/CodeGen/AMDGPU/branch-relaxation.ll |   6 +-
 .../AMDGPU/buffer-fat-pointers-memcpy.ll      |   2 +-
 .../AMDGPU/bug-sdag-emitcopyfromreg.ll        |   2 +-
 .../test/CodeGen/AMDGPU/carryout-selection.ll |  60 +--
 llvm/test/CodeGen/AMDGPU/collapse-endcf.ll    |  12 +-
 .../CodeGen/AMDGPU/combine-add-zext-xor.ll    |  16 +-
 .../CodeGen/AMDGPU/combine_andor_with_cmps.ll |  48 +--
 llvm/test/CodeGen/AMDGPU/commute-compares.ll  |  34 +-
 .../CodeGen/AMDGPU/copy-to-reg-frameindex.ll  |   2 +-
 llvm/test/CodeGen/AMDGPU/div_i128.ll          |  28 +-
 llvm/test/CodeGen/AMDGPU/div_v2i128.ll        | 112 +++---
 .../CodeGen/AMDGPU/extract-subvector-16bit.ll |  44 +--
 llvm/test/CodeGen/AMDGPU/extract-subvector.ll |  34 +-
 llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll     | 294 +++++++-------
 llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll  | 288 +++++++-------
 .../CodeGen/AMDGPU/flat_atomics_i64_system.ll | 372 +++++++++---------
 .../flat_atomics_i64_system_noprivate.ll      | 162 ++++----
 llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll |  90 ++---
 .../CodeGen/AMDGPU/fneg-modifier-casting.ll   |  12 +-
 .../AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll |   6 +-
 llvm/test/CodeGen/AMDGPU/fptoi.i128.ll        | 112 +++---
 .../AMDGPU/fptrunc.v2f16.no.fast.math.ll      |  24 +-
 .../AMDGPU/global_atomics_i64_system.ll       | 162 ++++----
 .../CodeGen/AMDGPU/indirect-addressing-si.ll  |   8 +-
 llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll     |  22 +-
 llvm/test/CodeGen/AMDGPU/itofp.i128.ll        | 126 +++---
 .../CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll  |  26 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll  |  24 +-
 .../AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll   |  14 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll    | 128 +++---
 .../CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll    |  32 +-
 .../AMDGPU/llvm.amdgcn.readfirstlane.ll       |   4 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll  |  24 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll  |  24 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll  |  24 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll  |  24 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll   |  24 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll  |  24 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll |  24 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll |  24 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll  |  24 +-
 .../CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll    | 118 +++---
 .../CodeGen/AMDGPU/llvm.is.fpclass.f16.ll     | 120 +++---
 llvm/test/CodeGen/AMDGPU/llvm.mulo.ll         |  20 +-
 llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll    |   4 +-
 .../test/CodeGen/AMDGPU/loop_exit_with_xor.ll |   8 +-
 ...ne-sink-temporal-divergence-swdev407790.ll |   6 +-
 .../CodeGen/AMDGPU/memintrinsic-unroll.ll     |  30 +-
 .../CodeGen/AMDGPU/nested-loop-conditions.ll  |   8 +-
 .../CodeGen/AMDGPU/private-memory-atomics.ll  |   4 +-
 llvm/test/CodeGen/AMDGPU/rem_i128.ll          |  16 +-
 llvm/test/CodeGen/AMDGPU/saddo.ll             |  22 +-
 llvm/test/CodeGen/AMDGPU/saddsat.ll           |  22 +-
 llvm/test/CodeGen/AMDGPU/sdiv64.ll            |  24 +-
 .../CodeGen/AMDGPU/select-constant-xor.ll     |   6 +-
 .../AMDGPU/set-inactive-wwm-overwrite.ll      |   4 +-
 llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll |   2 +-
 llvm/test/CodeGen/AMDGPU/shift-i128.ll        |  24 +-
 llvm/test/CodeGen/AMDGPU/shl64_reduce.ll      |  40 +-
 .../AMDGPU/should-not-hoist-set-inactive.ll   |   2 +-
 .../si-unify-exit-multiple-unreachables.ll    |   2 +-
 .../si-unify-exit-return-unreachable.ll       |   1 -
 .../AMDGPU/srem-seteq-illegal-types.ll        |   2 +-
 llvm/test/CodeGen/AMDGPU/srem.ll              |  20 +-
 llvm/test/CodeGen/AMDGPU/srem64.ll            |  32 +-
 llvm/test/CodeGen/AMDGPU/srl64_reduce.ll      |  40 +-
 llvm/test/CodeGen/AMDGPU/ssubsat.ll           | 146 +++----
 .../AMDGPU/tuple-allocation-failure.ll        |  12 +-
 llvm/test/CodeGen/AMDGPU/uaddo.ll             |  12 +-
 llvm/test/CodeGen/AMDGPU/udiv64.ll            |  24 +-
 .../AMDGPU/undef-handling-crash-in-ra.ll      |   2 +-
 llvm/test/CodeGen/AMDGPU/uniform-cfg.ll       |  12 +-
 .../AMDGPU/unstructured-cfg-def-use-issue.ll  |   4 +-
 .../AMDGPU/urem-seteq-illegal-types.ll        |  14 +-
 llvm/test/CodeGen/AMDGPU/urem64.ll            |  22 +-
 llvm/test/CodeGen/AMDGPU/v_cndmask.ll         |  40 +-
 llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll |  20 +-
 llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll    |  10 +-
 .../test/CodeGen/AMDGPU/vni8-across-blocks.ll |  50 +--
 llvm/test/CodeGen/AMDGPU/vopc_dpp-true16.mir  |   2 +-
 llvm/test/CodeGen/AMDGPU/vopc_dpp.mir         |   2 +-
 llvm/test/CodeGen/AMDGPU/wave32.ll            |  68 ++--
 llvm/test/CodeGen/AMDGPU/wqm.ll               |  44 +--
 119 files changed, 2688 insertions(+), 2682 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index ca5ed5cd24603..eb002567f3ef5 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -799,72 +799,72 @@ defm V_CMPX_T_U16 : VOPCX_I16 <"v_cmpx_t_u16">;
 } // End SubtargetPredicate = Has16BitInsts
 
 defm V_CMP_F_I32 : VOPC_I32 <"v_cmp_f_i32">;
-defm V_CMP_LT_I32 : VOPC_I32 <"v_cmp_lt_i32", COND_SLT, "v_cmp_gt_i32">;
+defm V_CMP_LT_I32 : VOPC_I32 <"v_cmp_lt_i32", COND_SLT, "v_cmp_ge_i32">;
 defm V_CMP_EQ_I32 : VOPC_I32 <"v_cmp_eq_i32">;
-defm V_CMP_LE_I32 : VOPC_I32 <"v_cmp_le_i32", COND_SLE, "v_cmp_ge_i32">;
+defm V_CMP_LE_I32 : VOPC_I32 <"v_cmp_le_i32", COND_SLE, "v_cmp_gt_i32">;
 defm V_CMP_GT_I32 : VOPC_I32 <"v_cmp_gt_i32", COND_SGT>;
 defm V_CMP_NE_I32 : VOPC_I32 <"v_cmp_ne_i32">;
 defm V_CMP_GE_I32 : VOPC_I32 <"v_cmp_ge_i32", COND_SGE>;
 defm V_CMP_T_I32 : VOPC_I32 <"v_cmp_t_i32">;
 
 defm V_CMPX_F_I32 : VOPCX_I32 <"v_cmpx_f_i32">;
-defm V_CMPX_LT_I32 : VOPCX_I32 <"v_cmpx_lt_i32", "v_cmpx_gt_i32">;
+defm V_CMPX_LT_I32 : VOPCX_I32 <"v_cmpx_lt_i32", "v_cmpx_ge_i32">;
 defm V_CMPX_EQ_I32 : VOPCX_I32 <"v_cmpx_eq_i32">;
-defm V_CMPX_LE_I32 : VOPCX_I32 <"v_cmpx_le_i32", "v_cmpx_ge_i32">;
+defm V_CMPX_LE_I32 : VOPCX_I32 <"v_cmpx_le_i32", "v_cmpx_gt_i32">;
 defm V_CMPX_GT_I32 : VOPCX_I32 <"v_cmpx_gt_i32">;
 defm V_CMPX_NE_I32 : VOPCX_I32 <"v_cmpx_ne_i32">;
 defm V_CMPX_GE_I32 : VOPCX_I32 <"v_cmpx_ge_i32">;
 defm V_CMPX_T_I32 : VOPCX_I32 <"v_cmpx_t_i32">;
 
 defm V_CMP_F_I64 : VOPC_I64 <"v_cmp_f_i64">;
-defm V_CMP_LT_I64 : VOPC_I64 <"v_cmp_lt_i64", COND_SLT, "v_cmp_gt_i64">;
+defm V_CMP_LT_I64 : VOPC_I64 <"v_cmp_lt_i64", COND_SLT, "v_cmp_ge_i64">;
 defm V_CMP_EQ_I64 : VOPC_I64 <"v_cmp_eq_i64">;
-defm V_CMP_LE_I64 : VOPC_I64 <"v_cmp_le_i64", COND_SLE, "v_cmp_ge_i64">;
+defm V_CMP_LE_I64 : VOPC_I64 <"v_cmp_le_i64", COND_SLE, "v_cmp_gt_i64">;
 defm V_CMP_GT_I64 : VOPC_I64 <"v_cmp_gt_i64", COND_SGT>;
 defm V_CMP_NE_I64 : VOPC_I64 <"v_cmp_ne_i64">;
 defm V_CMP_GE_I64 : VOPC_I64 <"v_cmp_ge_i64", COND_SGE>;
 defm V_CMP_T_I64 : VOPC_I64 <"v_cmp_t_i64">;
 
 defm V_CMPX_F_I64 : VOPCX_I64 <"v_cmpx_f_i64">;
-defm V_CMPX_LT_I64 : VOPCX_I64 <"v_cmpx_lt_i64", "v_cmpx_gt_i64">;
+defm V_CMPX_LT_I64 : VOPCX_I64 <"v_cmpx_lt_i64", "v_cmpx_ge_i64">;
 defm V_CMPX_EQ_I64 : VOPCX_I64 <"v_cmpx_eq_i64">;
-defm V_CMPX_LE_I64 : VOPCX_I64 <"v_cmpx_le_i64", "v_cmpx_ge_i64">;
+defm V_CMPX_LE_I64 : VOPCX_I64 <"v_cmpx_le_i64", "v_cmpx_gt_i64">;
 defm V_CMPX_GT_I64 : VOPCX_I64 <"v_cmpx_gt_i64">;
 defm V_CMPX_NE_I64 : VOPCX_I64 <"v_cmpx_ne_i64">;
 defm V_CMPX_GE_I64 : VOPCX_I64 <"v_cmpx_ge_i64">;
 defm V_CMPX_T_I64 : VOPCX_I64 <"v_cmpx_t_i64">;
 
 defm V_CMP_F_U32 : VOPC_I32 <"v_cmp_f_u32">;
-defm V_CMP_LT_U32 : VOPC_I32 <"v_cmp_lt_u32", COND_ULT, "v_cmp_gt_u32">;
+defm V_CMP_LT_U32 : VOPC_I32 <"v_cmp_lt_u32", COND_ULT, "v_cmp_ge_u32">;
 defm V_CMP_EQ_U32 : VOPC_I32 <"v_cmp_eq_u32", COND_EQ>;
-defm V_CMP_LE_U32 : VOPC_I32 <"v_cmp_le_u32", COND_ULE, "v_cmp_ge_u32">;
+defm V_CMP_LE_U32 : VOPC_I32 <"v_cmp_le_u32", COND_ULE, "v_cmp_gt_u32">;
 defm V_CMP_GT_U32 : VOPC_I32 <"v_cmp_gt_u32", COND_UGT>;
 defm V_CMP_NE_U32 : VOPC_I32 <"v_cmp_ne_u32", COND_NE>;
 defm V_CMP_GE_U32 : VOPC_I32 <"v_cmp_ge_u32", COND_UGE>;
 defm V_CMP_T_U32 : VOPC_I32 <"v_cmp_t_u32">;
 
 defm V_CMPX_F_U32 : VOPCX_I32 <"v_cmpx_f_u32">;
-defm V_CMPX_LT_U32 : VOPCX_I32 <"v_cmpx_lt_u32", "v_cmpx_gt_u32">;
+defm V_CMPX_LT_U32 : VOPCX_I32 <"v_cmpx_lt_u32", "v_cmpx_ge_u32">;
 defm V_CMPX_EQ_U32 : VOPCX_I32 <"v_cmpx_eq_u32">;
-defm V_CMPX_LE_U32 : VOPCX_I32 <"v_cmpx_le_u32", "v_cmpx_le_u32">;
+defm V_CMPX_LE_U32 : VOPCX_I32 <"v_cmpx_le_u32", "v_cmpx_gt_u32">;
 defm V_CMPX_GT_U32 : VOPCX_I32 <"v_cmpx_gt_u32">;
 defm V_CMPX_NE_U32 : VOPCX_I32 <"v_cmpx_ne_u32">;
 defm V_CMPX_GE_U32 : VOPCX_I32 <"v_cmpx_ge_u32">;
 defm V_CMPX_T_U32 : VOPCX_I32 <"v_cmpx_t_u32">;
 
 defm V_CMP_F_U64 : VOPC_I64 <"v_cmp_f_u64">;
-defm V_CMP_LT_U64 : VOPC_I64 <"v_cmp_lt_u64", COND_ULT, "v_cmp_gt_u64">;
+defm V_CMP_LT_U64 : VOPC_I64 <"v_cmp_lt_u64", COND_ULT, "v_cmp_ge_u64">;
 defm V_CMP_EQ_U64 : VOPC_I64 <"v_cmp_eq_u64", COND_EQ>;
-defm V_CMP_LE_U64 : VOPC_I64 <"v_cmp_le_u64", COND_ULE, "v_cmp_ge_u64">;
+defm V_CMP_LE_U64 : VOPC_I64 <"v_cmp_le_u64", COND_ULE, "v_cmp_gt_u64">;
 defm V_CMP_GT_U64 : VOPC_I64 <"v_cmp_gt_u64", COND_UGT>;
 defm V_CMP_NE_U64 : VOPC_I64 <"v_cmp_ne_u64", COND_NE>;
 defm V_CMP_GE_U64 : VOPC_I64 <"v_cmp_ge_u64", COND_UGE>;
 defm V_CMP_T_U64 : VOPC_I64 <"v_cmp_t_u64">;
 
 defm V_CMPX_F_U64 : VOPCX_I64 <"v_cmpx_f_u64">;
-defm V_CMPX_LT_U64 : VOPCX_I64 <"v_cmpx_lt_u64", "v_cmpx_gt_u64">;
+defm V_CMPX_LT_U64 : VOPCX_I64 <"v_cmpx_lt_u64", "v_cmpx_ge_u64">;
 defm V_CMPX_EQ_U64 : VOPCX_I64 <"v_cmpx_eq_u64">;
-defm V_CMPX_LE_U64 : VOPCX_I64 <"v_cmpx_le_u64", "v_cmpx_ge_u64">;
+defm V_CMPX_LE_U64 : VOPCX_I64 <"v_cmpx_le_u64", "v_cmpx_gt_u64">;
 defm V_CMPX_GT_U64 : VOPCX_I64 <"v_cmpx_gt_u64">;
 defm V_CMPX_NE_U64 : VOPCX_I64 <"v_cmpx_ne_u64">;
 defm V_CMPX_GE_U64 : VOPCX_I64 <"v_cmpx_ge_u64">;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
index 38374d1689366..27668752120e8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
@@ -205,7 +205,7 @@ define i32 @v_saddo_i32(i32 %a, i32 %b) {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_add_i32_e32 v2, vcc, v0, v1
 ; GFX7-NEXT:    v_cmp_lt_i32_e32 vcc, v2, v0
-; GFX7-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v1
+; GFX7-NEXT:    v_cmp_ge_i32_e64 s[4:5], 0, v1
 ; GFX7-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
@@ -216,7 +216,7 @@ define i32 @v_saddo_i32(i32 %a, i32 %b) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v0, v1
 ; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, v2, v0
-; GFX8-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v1
+; GFX8-NEXT:    v_cmp_ge_i32_e64 s[4:5], 0, v1
 ; GFX8-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
@@ -227,7 +227,7 @@ define i32 @v_saddo_i32(i32 %a, i32 %b) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_add_u32_e32 v2, v0, v1
 ; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, v2, v0
-; GFX9-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v1
+; GFX9-NEXT:    v_cmp_ge_i32_e64 s[4:5], 0, v1
 ; GFX9-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX9-NEXT:    v_add_u32_e32 v0, v2, v0
@@ -247,7 +247,7 @@ define i64 @v_saddo_i64(i64 %a, i64 %b) {
 ; GFX7-NEXT:    v_add_i32_e32 v4, vcc, v0, v2
 ; GFX7-NEXT:    v_addc_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX7-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX7-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
+; GFX7-NEXT:    v_cmp_ge_i64_e64 s[4:5], 0, v[2:3]
 ; GFX7-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
@@ -260,7 +260,7 @@ define i64 @v_saddo_i64(i64 %a, i64 %b) {
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v0, v2
 ; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX8-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
+; GFX8-NEXT:    v_cmp_ge_i64_e64 s[4:5], 0, v[2:3]
 ; GFX8-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v4, v0
@@ -273,7 +273,7 @@ define i64 @v_saddo_i64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
+; GFX9-NEXT:    v_cmp_ge_i64_e64 s[4:5], 0, v[2:3]
 ; GFX9-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v4, v0
@@ -295,8 +295,8 @@ define <2 x i32> @v_saddo_v2i32(<2 x i32> %a, <2 x i32> %b) {
 ; GFX7-NEXT:    v_add_i32_e32 v5, vcc, v1, v3
 ; GFX7-NEXT:    v_cmp_lt_i32_e32 vcc, v4, v0
 ; GFX7-NEXT:    v_cmp_lt_i32_e64 s[4:5], v5, v1
-; GFX7-NEXT:    v_cmp_gt_i32_e64 s[6:7], 0, v2
-; GFX7-NEXT:    v_cmp_gt_i32_e64 s[8:9], 0, v3
+; GFX7-NEXT:    v_cmp_ge_i32_e64 s[6:7], 0, v2
+; GFX7-NEXT:    v_cmp_ge_i32_e64 s[8:9], 0, v3
 ; GFX7-NEXT:    s_xor_b64 s[6:7], s[6:7], vcc
 ; GFX7-NEXT:    s_xor_b64 s[4:5], s[8:9], s[4:5]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[6:7]
@@ -312,8 +312,8 @@ define <2 x i32> @v_saddo_v2i32(<2 x i32> %a, <2 x i32> %b) {
 ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v1, v3
 ; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, v4, v0
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v5, v1
-; GFX8-NEXT:    v_cmp_gt_i32_e64 s[6:7], 0, v2
-; GFX8-NEXT:    v_cmp_gt_i32_e64 s[8:9], 0, v3
+; GFX8-NEXT:    v_cmp_ge_i32_e64 s[6:7], 0, v2
+; GFX8-NEXT:    v_cmp_ge_i32_e64 s[8:9], 0, v3
 ; GFX8-NEXT:    s_xor_b64 s[6:7], s[6:7], vcc
 ; GFX8-NEXT:    s_xor_b64 s[4:5], s[8:9], s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[6:7]
@@ -329,8 +329,8 @@ define <2 x i32> @v_saddo_v2i32(<2 x i32> %a, <2 x i32> %b) {
 ; GFX9-NEXT:    v_add_u32_e32 v5, v1, v3
 ; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, v4, v0
 ; GFX9-NEXT:    v_cmp_lt_i32_e64 s[4:5], v5, v1
-; GFX9-NEXT:    v_cmp_gt_i32_e64 s[6:7], 0, v2
-; GFX9-NEXT:    v_cmp_gt_i32_e64 s[8:9], 0, v3
+; GFX9-NEXT:    v_cmp_ge_i32_e64 s[6:7], 0, v2
+; GFX9-NEXT:    v_cmp_ge_i32_e64 s[8:9], 0, v3
 ; GFX9-NEXT:    s_xor_b64 s[6:7], s[6:7], vcc
 ; GFX9-NEXT:    s_xor_b64 s[4:5], s[8:9], s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[6:7]
@@ -355,7 +355,7 @@ define i8 @v_saddo_i8(i8 %a, i8 %b) {
 ; GFX7-NEXT:    v_bfe_i32 v0, v0, 0, 8
 ; GFX7-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX7-NEXT:    v_bfe_i32 v0, v1, 0, 8
-; GFX7-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX7-NEXT:    v_cmp_ge_i32_e64 s[4:5], 0, v0
 ; GFX7-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
@@ -369,7 +369,7 @@ define i8 @v_saddo_i8(i8 %a, i8 %b) {
 ; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 8
 ; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX8-NEXT:    v_bfe_i32 v0, v1, 0, 8
-; GFX8-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX8-NEXT:    v_cmp_ge_i32_e64 s[4:5], 0, v0
 ; GFX8-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX8-NEXT:    v_add_u16_e32 v0, v2, v0
@@ -403,7 +403,7 @@ define i7 @v_saddo_i7(i7 %a, i7 %b) {
 ; GFX7-NEXT:    v_bfe_i32 v0, v0, 0, 7
 ; GFX7-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX7-NEXT:    v_bfe_i32 v0, v1, 0, 7
-; GFX7-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX7-NEXT:    v_cmp_ge_i32_e64 s[4:5], 0, v0
 ; GFX7-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
@@ -417,7 +417,7 @@ define i7 @v_saddo_i7(i7 %a, i7 %b) {
 ; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 7
 ; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX8-NEXT:    v_bfe_i32 v0, v1, 0, 7
-; GFX8-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX8-NEXT:    v_cmp_ge_i32_e64 s[4:5], 0, v0
 ; GFX8-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX8-NEXT:    v_add_u16_e32 v0, v2, v0
@@ -431,7 +431,7 @@ define i7 @v_saddo_i7(i7 %a, i7 %b) {
 ; GFX9-NEXT:    v_bfe_i32 v0, v0, 0, 7
 ; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX9-NEXT:    v_bfe_i32 v0, v1, 0, 7
-; GFX9-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX9-NEXT:    v_cmp_ge_i32_e64 s[4:5], 0, v0
 ; GFX9-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX9-NEXT:    v_add_u16_e32 v0, v2, v0
@@ -802,7 +802,7 @@ define i8 @s_saddo_i8(i8 %a, i8 %b) {
 ; GFX7-NEXT:    v_bfe_i32 v0, v0, 0, 8
 ; GFX7-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX7-NEXT:    v_bfe_i32 v0, v1, 0, 8
-; GFX7-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX7-NEXT:    v_cmp_ge_i32_e64 s[4:5], 0, v0
 ; GFX7-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
@@ -816,7 +816,7 @@ define i8 @s_saddo_i8(i8 %a, i8 %b) {
 ; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 8
 ; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX8-NEXT:    v_bfe_i32 v0, v1, 0, 8
-; GFX8-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX8-NEXT:    v_cmp_ge_i32_e64 s[4:5], 0, v0
 ; GFX8-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX8-NEXT:    v_add_u16_e32 v0, v2, v0
@@ -850,7 +850,7 @@ define i7 @s_saddo_i7(i7 %a, i7 %b) {
 ; GFX7-NEXT:    v_bfe_i32 v0, v0, 0, 7
 ; GFX7-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX7-NEXT:    v_bfe_i32 v0, v1, 0, 7
-; GFX7-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX7-NEXT:    v_cmp_ge_i32_e64 s[4:5], 0, v0
 ; GFX7-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
@@ -864,7 +864,7 @@ define i7 @s_saddo_i7(i7 %a, i7 %b) {
 ; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 7
 ; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX8-NEXT:    v_bfe_i32 v0, v1, 0, 7
-; GFX8-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX8-NEXT:    v_cmp_ge_i32_e64 s[4:5], 0, v0
 ; GFX8-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX8-NEXT:    v_add_u16_e32 v0, v2, v0
@@ -878,7 +878,7 @@ define i7 @s_saddo_i7(i7 %a, i7 %b) {
 ; GFX9-NEXT:    v_bfe_i32 v0, v0, 0, 7
 ; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX9-NEXT:    v_bfe_i32 v0, v1, 0, 7
-; GFX9-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX9-NEXT:    v_cmp_ge_i32_e64 s[4:5], 0, v0
 ; GFX9-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX9-NEXT:    v_add_u16_e32 v0, v2, v0
@@ -969,8 +969,8 @@ define amdgpu_ps i32 @saddo_i32_sv(i32 inreg %a, i32 %b) {
 ; GFX7-LABEL: saddo_i32_sv:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    v_add_i32_e32 v1, vcc, s0, v0
-; GFX7-NEXT:    v_cmp_gt_i32_e32 vcc, s0, v1
-; GFX7-NEXT:    v_cmp_gt_i32_e64 s[0:1], 0, v0
+; GFX7-NEXT:    v_cmp_ge_i32_e32 vcc, s0, v1
+; GFX7-NEXT:    v_cmp_ge_i32_e64 s[0:1], 0, v0
 ; GFX7-NEXT:    s_xor_b64 s[0:1], s[0:1], vcc
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
@@ -980,8 +980,8 @@ define amdgpu_ps i32 @saddo_i32_sv(i32 inreg %a, i32 %b) {
 ; GFX8-LABEL: saddo_i32_sv:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, s0, v0
-; GFX8-NEXT:    v_cmp_gt_i32_e32 vcc, s0, v1
-; GFX8-NEXT:    v_cmp_gt_i32_e64 s[0:1], 0, v0
+; GFX8-NEXT:    v_cmp_ge_i32_e32 vcc, s0, v1
+; GFX8-NEXT:    v_cmp_ge_i32_e64 s[0:1], 0, v0
 ; GFX8-NEXT:    s_xor_b64 s[0:1], s[0:1], vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v1, v0
@@ -991,8 +991,8 @@ define amdgpu_ps i32 @saddo_i32_sv(i32 inreg %a, i32 %b) {
 ; GFX9-LABEL: saddo_i32_sv:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_add_u32_e32 v1, s0, v0
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, s0, v1
-; GFX9-NEXT:    v_cmp_gt_i32_e64 s[0:1], 0, v0
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, s0, v1
+; GFX9-NEXT:    v_cmp_ge_i32_e64 s[0:1], 0, v0
 ; GFX9-NEXT:    s_xor_b64 s[0:1], s[0:1], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
 ; GFX9-NEXT:    v_add_u32_e32 v0, v1, v0
@@ -1013,8 +1013,8 @@ define amdgpu_ps i16 @saddo_i16_sv(i16 inreg %a, i16 %b) {
 ; GFX7-NEXT:    v_bfe_i32 v2, v1, 0, 16
 ; GFX7-NEXT:    s_sext_i32_i16 s0, s0
 ; GFX7-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; GFX7-NEXT:    v_cmp_gt_i32_e32 vcc, s0, v2
-; GFX7-NEXT:    v_cmp_gt_i32_e64 s[0:1], 0, v0
+; GFX7-NEXT:    v_cmp_ge_i32_e32 vcc, s0, v2
+; GFX7-NEXT:    v_cmp_ge_i32_e64 s[0:1], 0, v0
 ; GFX7-NEXT:    s_xor_b64 s[0:1], s[0:1], vcc
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
index 353c09b4b0bfb..3f61fdd8309ec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
@@ -1666,7 +1666,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v8, 31, v5
 ; GFX6-NEXT:    v_ashr_i64 v[4:5], v[4:5], v2
 ; GFX6-NEXT:    v_or_b32_e32 v7, v7, v9
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
+; GFX6-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v3
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v5, v7, vcc
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
@@ -1689,7 +1689,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v8, 31, v5
 ; GFX8-NEXT:    v_ashrrev_i64 v[4:5], v2, v[4:5]
 ; GFX8-NEXT:    v_or_b32_e32 v7, v7, v9
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
+; GFX8-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v5, v7, vcc
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
@@ -1712,7 +1712,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v8, 31, v5
 ; GFX9-NEXT:    v_ashrrev_i64 v[4:5], v2, v[4:5]
 ; GFX9-NEXT:    v_or_b32_e32 v7, v7, v9
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v5, v7, vcc
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
@@ -1728,7 +1728,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 64, v3
 ; GFX10-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v3
 ; GFX10-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v3
+; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v3
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
 ; GFX10-NEXT:    v_cmp_eq_u32_e64 s4, 0, v3
 ; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v2, v[4:5]
@@ -1750,7 +1750,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
 ; GFX11-NEXT:    v_bfe_i32 v4, v2, 0, 1
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, 64, v3
 ; GFX11-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
-; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v3
+; GFX11-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v3
 ; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 0, v3
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
 ; GFX11-NEXT:    v_lshlrev_b64 v[8:9], v2, v[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
index 11acd451d98d7..0810342185c64 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
@@ -30,11 +30,11 @@ define amdgpu_ps void @divergent_i1_phi_uniform_branch(ptr addrspace(1) %out, i3
 ; GFX10-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX10-NEXT:    s_cbranch_scc0 .LBB0_2
 ; GFX10-NEXT:  ; %bb.1:
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, 6, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, 6, v2
 ; GFX10-NEXT:    s_branch .LBB0_3
 ; GFX10-NEXT:  .LBB0_2: ; %dummy
 ; GFX10-NEXT:    v_mov_b32_e32 v5, 0x7b
-; GFX10-NEXT:    v_cmp_gt_u32_e64 s0, 1, v2
+; GFX10-NEXT:    v_cmp_ge_u32_e64 s0, 1, v2
 ; GFX10-NEXT:    global_store_dword v[3:4], v5, off
 ; GFX10-NEXT:  .LBB0_3: ; %exit
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s0
@@ -65,11 +65,11 @@ exit:
 define amdgpu_ps void @divergent_i1_phi_uniform_branch_simple(ptr addrspace(1) %out, i32 %tid, i32 inreg %cond) {
 ; GFX10-LABEL: divergent_i1_phi_uniform_branch_simple:
 ; GFX10:       ; %bb.0: ; %A
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, 6, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s1, 6, v2
 ; GFX10-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX10-NEXT:    s_cbranch_scc1 .LBB1_2
 ; GFX10-NEXT:  ; %bb.1: ; %B
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 1, v2
+; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 1, v2
 ; GFX10-NEXT:    s_andn2_b32 s0, s1, exec_lo
 ; GFX10-NEXT:    s_and_b32 s1, exec_lo, vcc_lo
 ; GFX10-NEXT:    s_or_b32 s1, s0, s1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
index be90b02a6dd65..9c9be2db98acf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
@@ -78,7 +78,7 @@ define void @divergent_i1_phi_used_outside_loop_larger_loop_body(float %val, ptr
 ; GFX10-NEXT:    v_add_co_ci_u32_e64 v2, s4, 0, v2, s4
 ; GFX10-NEXT:    s_andn2_b32 s7, s5, exec_lo
 ; GFX10-NEXT:    s_and_b32 s8, exec_lo, s6
-; GFX10-NEXT:    v_cmp_le_i32_e32 vcc_lo, 10, v0
+; GFX10-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 10, v0
 ; GFX10-NEXT:    s_or_b32 s4, s7, s8
 ; GFX10-NEXT:    s_cbranch_vccz .LBB1_4
 ; GFX10-NEXT:  .LBB1_2: ; %loop.start
@@ -590,7 +590,7 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
 ; GFX10-NEXT:    v_add_co_u32 v7, vcc_lo, v0, v7
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v10, 1, v6
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 0x64, v6
+; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 0x64, v6
 ; GFX10-NEXT:    s_andn2_b32 s4, s4, exec_lo
 ; GFX10-NEXT:    global_load_dword v9, v[7:8], off
 ; GFX10-NEXT:    s_and_b32 s6, exec_lo, 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
index e31077dd1986f..8bf3a8d58789d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
@@ -7,11 +7,11 @@
 define amdgpu_ps void @divergent_i1_phi_if_then(ptr addrspace(1) %out, i32 %tid, i32 %cond) {
 ; GFX10-LABEL: divergent_i1_phi_if_then:
 ; GFX10:       ; %bb.0: ; %A
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, 6, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, 6, v2
 ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v3
 ; GFX10-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX10-NEXT:  ; %bb.1: ; %B
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 1, v2
+; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 1, v2
 ; GFX10-NEXT:    s_andn2_b32 s0, s0, exec_lo
 ; GFX10-NEXT:    s_and_b32 s2, exec_lo, vcc_lo
 ; GFX10-NEXT:    s_or_b32 s0, s0, s2
@@ -48,7 +48,7 @@ define amdgpu_ps void @divergent_i1_phi_if_else(ptr addrspace(1) %out, i32 %tid,
 ; GFX10-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX10-NEXT:    s_xor_b32 s1, exec_lo, s1
 ; GFX10-NEXT:  ; %bb.1: ; %B
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 2, v2
+; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 2, v2
 ; GFX10-NEXT:    s_andn2_b32 s0, s0, exec_lo
 ; GFX10-NEXT:    ; implicit-def: $vgpr2
 ; GFX10-NEXT:    s_and_b32 s2, exec_lo, vcc_lo
@@ -56,7 +56,7 @@ define amdgpu_ps void @divergent_i1_phi_if_else(ptr addrspace(1) %out, i32 %tid,
 ; GFX10-NEXT:  ; %bb.2: ; %Flow
 ; GFX10-NEXT:    s_andn2_saveexec_b32 s1, s1
 ; GFX10-NEXT:  ; %bb.3: ; %A
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 1, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 1, v2
 ; GFX10-NEXT:    s_andn2_b32 s0, s0, exec_lo
 ; GFX10-NEXT:    s_and_b32 s2, exec_lo, vcc_lo
 ; GFX10-NEXT:    s_or_b32 s0, s0, s2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
index 94dfd4eb2cb2b..5eebc37dfcbdd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
@@ -216,7 +216,7 @@ define amdgpu_kernel void @break_loop(i32 %arg) {
 ; CHECK-NEXT:    v_add_u32_e32 v1, 1, v1
 ; CHECK-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
 ; CHECK-NEXT:    s_and_b64 s[4:5], exec, -1
-; CHECK-NEXT:    v_cmp_le_i32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v1
 ; CHECK-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
 ; CHECK-NEXT:    s_cbranch_vccnz .LBB5_1
 ; CHECK-NEXT:  ; %bb.3: ; %bb4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
index fc81e16d68e98..39d4c3407e5de 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
@@ -21,10 +21,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, -7, v0
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, -7, v0
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 6, v0
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0x7f, v0
@@ -51,10 +51,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, -7, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, -7, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX8-NEXT:    v_sub_u16_e32 v1, 6, v0
 ; GFX8-NEXT:    v_and_b32_e32 v0, 0x7f, v0
@@ -81,10 +81,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
 ; GFX9-NEXT:    v_add_u32_e32 v1, -7, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v1, -7, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX9-NEXT:    v_sub_u16_e32 v1, 6, v0
 ; GFX9-NEXT:    v_and_b32_e32 v0, 0x7f, v0
@@ -111,10 +111,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, -7, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 7, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, -7, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 7, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX10-NEXT:    v_sub_nc_u16 v1, 6, v0
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0x7f, v0
@@ -147,11 +147,11 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
 ; GFX11-NEXT:    v_add_nc_u32_e32 v1, -7, v0
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 7, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX11-NEXT:    v_add_nc_u32_e32 v1, -7, v0
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 7, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX11-NEXT:    v_sub_nc_u16 v1, 6, v0
@@ -186,10 +186,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, -7, v2
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, -7, v2
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 6, v2
 ; GFX6-NEXT:    v_and_b32_e32 v2, 0x7f, v2
@@ -216,10 +216,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, -7, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, -7, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_sub_u16_e32 v3, 6, v2
 ; GFX8-NEXT:    v_and_b32_e32 v2, 0x7f, v2
@@ -246,10 +246,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
 ; GFX9-NEXT:    v_add_u32_e32 v3, -7, v2
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, -7, v2
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX9-NEXT:    v_sub_u16_e32 v3, 6, v2
 ; GFX9-NEXT:    v_and_b32_e32 v2, 0x7f, v2
@@ -276,10 +276,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, -7, v2
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 7, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, -7, v2
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 7, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX10-NEXT:    v_sub_nc_u16 v3, 6, v2
 ; GFX10-NEXT:    v_and_b32_e32 v2, 0x7f, v2
@@ -312,11 +312,11 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, -7, v2
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 7, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, -7, v2
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 7, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX11-NEXT:    v_sub_nc_u16 v3, 6, v2
@@ -1465,10 +1465,10 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v0, v1
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v0, v1
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 23, v0
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
@@ -1495,10 +1495,10 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v0, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v0, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 23, v0
 ; GFX8-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
@@ -1525,10 +1525,10 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0xffffffe8, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0xffffffe8, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v1, 23, v0
 ; GFX9-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
@@ -1553,10 +1553,10 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v1, 23, v0
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
@@ -1587,11 +1587,11 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v1, 23, v0
@@ -1625,10 +1625,10 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 24
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v2, v4
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0xffffffe8, v2
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 23, v2
 ; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
@@ -1655,10 +1655,10 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 24
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v4
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xffffffe8, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 23, v2
 ; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
@@ -1685,10 +1685,10 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 24
 ; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
 ; GFX9-NEXT:    v_add_u32_e32 v3, 0xffffffe8, v2
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, 0xffffffe8, v2
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v3, 23, v2
 ; GFX9-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
@@ -1713,10 +1713,10 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 24
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 23, v2
 ; GFX10-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
@@ -1747,11 +1747,11 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v2
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v2
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v3, 23, v2
@@ -1832,19 +1832,19 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX6-NEXT:    v_mul_hi_u32 v2, v5, v2
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s1, v4
 ; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v4, v3
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v4
 ; GFX6-NEXT:    v_mul_lo_u32 v2, v2, 24
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v4, v3
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v4
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v5, v2
 ; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 23, v4
 ; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v2, v3
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v2, v3
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
 ; GFX6-NEXT:    s_lshr_b32 s0, s0, 1
 ; GFX6-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
@@ -1943,10 +1943,10 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s4, v2
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v1
 ; GFX8-NEXT:    v_mul_hi_u32 v0, s5, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 23, v2
@@ -1958,10 +1958,10 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s5, v0
 ; GFX8-NEXT:    v_or_b32_e32 v2, v2, v3
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v0, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v0, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 23, v0
 ; GFX8-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
@@ -2051,10 +2051,10 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX9-NEXT:    v_mul_hi_u32 v0, s5, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v1, s4, v1
 ; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v1
 ; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v2, 23, v1
@@ -2065,10 +2065,10 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX9-NEXT:    v_sub_u32_e32 v0, s5, v0
 ; GFX9-NEXT:    v_lshl_or_b32 v1, s0, v1, v2
 ; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v2, 23, v0
 ; GFX9-NEXT:    s_lshr_b32 s0, s3, 1
@@ -2155,21 +2155,21 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX10-NEXT:    s_and_b32 s5, s13, 0xff
 ; GFX10-NEXT:    s_lshl_b32 s4, s4, 16
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v4, 0xffffffe8, v0
 ; GFX10-NEXT:    s_lshl_b32 s5, s5, 16
 ; GFX10-NEXT:    s_or_b32 s2, s2, s4
 ; GFX10-NEXT:    s_or_b32 s3, s3, s5
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v0
 ; GFX10-NEXT:    s_lshr_b32 s3, s3, 1
 ; GFX10-NEXT:    s_lshr_b32 s2, s2, 1
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v4, 0xffffffe8, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v0
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 23, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
 ; GFX10-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
@@ -2264,24 +2264,24 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX11-NEXT:    s_and_b32 s5, s13, 0xff
 ; GFX11-NEXT:    s_lshl_b32 s4, s4, 16
 ; GFX11-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v1
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v1
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v0
 ; GFX11-NEXT:    s_or_b32 s2, s2, s4
 ; GFX11-NEXT:    s_lshl_b32 s5, s5, 16
 ; GFX11-NEXT:    s_lshr_b32 s2, s2, 1
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v0
 ; GFX11-NEXT:    s_or_b32 s3, s3, s5
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v1
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v1
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, 23, v1
 ; GFX11-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v0
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v0
 ; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
@@ -2345,10 +2345,10 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX6-NEXT:    v_mul_lo_u32 v6, v6, 24
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v8
 ; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v4, v7
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v4
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v4, v7
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v4
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, 23, v4
 ; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
@@ -2358,10 +2358,10 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v5, v6
 ; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v2, v7
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 0xffffffe8, v2
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 23, v2
 ; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
@@ -2392,10 +2392,10 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX8-NEXT:    v_mul_lo_u32 v6, v6, 24
 ; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v4, v8
 ; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v4, v7
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v4
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v4, v7
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v4
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, 23, v4
 ; GFX8-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
@@ -2405,10 +2405,10 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v5, v6
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v2, v7
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0xffffffe8, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 23, v2
 ; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
@@ -2441,21 +2441,21 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX9-NEXT:    v_sub_u32_e32 v4, v4, v7
 ; GFX9-NEXT:    v_sub_u32_e32 v5, v5, v6
 ; GFX9-NEXT:    v_add_u32_e32 v6, 0xffffffe8, v4
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v6, 0xffffffe8, v4
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v6, 23, v4
 ; GFX9-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
 ; GFX9-NEXT:    v_add_u32_e32 v7, 0xffffffe8, v5
 ; GFX9-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v2, v6, v2
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v5
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v5
 ; GFX9-NEXT:    v_lshl_or_b32 v0, v0, v4, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v5, v7, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v4, 0xffffffe8, v2
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v4, 23, v2
 ; GFX9-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
@@ -2485,16 +2485,16 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v4, v4, v7
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v5, v5, v6
 ; GFX10-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v4
 ; GFX10-NEXT:    v_add_nc_u32_e32 v7, 0xffffffe8, v5
 ; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v5
 ; GFX10-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
 ; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v4
 ; GFX10-NEXT:    v_add_nc_u32_e32 v7, 0xffffffe8, v5
 ; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v5
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v6, 23, v4
 ; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
 ; GFX10-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
@@ -2530,7 +2530,7 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX11-NEXT:    v_mul_lo_u32 v7, v7, 24
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v4, v4, v7
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v4
 ; GFX11-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
 ; GFX11-NEXT:    v_mul_hi_u32 v6, v5, v6
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -2539,15 +2539,15 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v5
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
 ; GFX11-NEXT:    v_add_nc_u32_e32 v7, 0xffffffe8, v5
 ; GFX11-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v4
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_dual_cndmask_b32 v4, v4, v6 :: v_dual_add_nc_u32 v7, 0xffffffe8, v5
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v5
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v6, 23, v4
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4
@@ -5827,7 +5827,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v17
 ; GFX6-NEXT:    v_or_b32_e32 v9, v9, v11
 ; GFX6-NEXT:    v_or_b32_e32 v10, v10, v12
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
+; GFX6-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v15
 ; GFX6-NEXT:    v_cndmask_b32_e32 v11, 0, v13, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v12, 0, v14, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
@@ -5850,7 +5850,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX6-NEXT:    v_lshr_b64 v[2:3], v[2:3], v15
 ; GFX6-NEXT:    v_or_b32_e32 v4, v4, v6
 ; GFX6-NEXT:    v_or_b32_e32 v5, v5, v7
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; GFX6-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v14
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
@@ -5876,7 +5876,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v17, v[0:1]
 ; GFX8-NEXT:    v_or_b32_e32 v9, v9, v11
 ; GFX8-NEXT:    v_or_b32_e32 v10, v10, v12
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
+; GFX8-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v15
 ; GFX8-NEXT:    v_cndmask_b32_e32 v11, 0, v13, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v12, 0, v14, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
@@ -5899,7 +5899,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX8-NEXT:    v_lshrrev_b64 v[2:3], v15, v[2:3]
 ; GFX8-NEXT:    v_or_b32_e32 v4, v4, v6
 ; GFX8-NEXT:    v_or_b32_e32 v5, v5, v7
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; GFX8-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v14
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
@@ -5925,7 +5925,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v16, v[0:1]
 ; GFX9-NEXT:    v_or_b32_e32 v9, v9, v11
 ; GFX9-NEXT:    v_or_b32_e32 v10, v10, v12
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v15
 ; GFX9-NEXT:    v_cndmask_b32_e32 v11, 0, v13, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v12, 0, v14, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
@@ -5946,7 +5946,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX9-NEXT:    v_lshrrev_b64 v[2:3], v15, v[2:3]
 ; GFX9-NEXT:    v_or_b32_e32 v4, v4, v6
 ; GFX9-NEXT:    v_or_b32_e32 v5, v5, v7
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v14
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
@@ -5976,7 +5976,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v16, 64, v19
 ; GFX10-NEXT:    v_lshlrev_b64 v[12:13], v18, v[0:1]
 ; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v14, v[0:1]
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v18
+; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v18
 ; GFX10-NEXT:    v_or_b32_e32 v8, v10, v8
 ; GFX10-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v19
 ; GFX10-NEXT:    v_lshrrev_b64 v[14:15], v19, v[4:5]
@@ -5984,7 +5984,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX10-NEXT:    v_or_b32_e32 v11, v11, v9
 ; GFX10-NEXT:    v_cndmask_b32_e32 v20, v0, v8, vcc_lo
 ; GFX10-NEXT:    v_lshrrev_b64 v[8:9], v10, v[6:7]
-; GFX10-NEXT:    v_cmp_gt_u32_e64 s5, 64, v19
+; GFX10-NEXT:    v_cmp_ge_u32_e64 s5, 64, v19
 ; GFX10-NEXT:    v_cmp_eq_u32_e64 s4, 0, v18
 ; GFX10-NEXT:    v_or_b32_e32 v0, v14, v16
 ; GFX10-NEXT:    v_or_b32_e32 v10, v15, v17
@@ -6018,14 +6018,14 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX11-NEXT:    v_and_b32_e32 v19, 0x7f, v12
 ; GFX11-NEXT:    v_lshlrev_b64 v[8:9], v18, v[2:3]
 ; GFX11-NEXT:    v_lshlrev_b64 v[12:13], v18, v[0:1]
-; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v18
+; GFX11-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v18
 ; GFX11-NEXT:    v_add_nc_u32_e32 v14, 0xffffffc0, v18
 ; GFX11-NEXT:    v_lshrrev_b64 v[10:11], v10, v[0:1]
 ; GFX11-NEXT:    v_lshl_or_b32 v5, v6, 31, v5
 ; GFX11-NEXT:    v_lshrrev_b64 v[6:7], 1, v[6:7]
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v16, 64, v19
 ; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v14, v[0:1]
-; GFX11-NEXT:    v_cmp_gt_u32_e64 s1, 64, v19
+; GFX11-NEXT:    v_cmp_ge_u32_e64 s1, 64, v19
 ; GFX11-NEXT:    v_or_b32_e32 v8, v10, v8
 ; GFX11-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v19
 ; GFX11-NEXT:    v_lshrrev_b64 v[14:15], v19, v[4:5]
@@ -6073,7 +6073,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX6-NEXT:    v_or_b32_e32 v4, v2, v4
 ; GFX6-NEXT:    v_lshl_b64 v[1:2], s[0:1], v9
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
+; GFX6-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v7
 ; GFX6-NEXT:    v_not_b32_e32 v0, v0
 ; GFX6-NEXT:    s_mov_b32 s8, 0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
@@ -6099,7 +6099,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX6-NEXT:    v_lshr_b64 v[0:1], s[2:3], v8
 ; GFX6-NEXT:    v_lshr_b64 v[4:5], s[2:3], v11
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
+; GFX6-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v11
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s0
@@ -6126,7 +6126,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX8-NEXT:    v_or_b32_e32 v4, v2, v4
 ; GFX8-NEXT:    v_lshlrev_b64 v[1:2], v9, s[0:1]
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
+; GFX8-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v7
 ; GFX8-NEXT:    v_not_b32_e32 v0, v0
 ; GFX8-NEXT:    s_mov_b32 s8, 0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
@@ -6152,7 +6152,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v8, s[2:3]
 ; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v11, s[2:3]
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
+; GFX8-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v11
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s0
@@ -6179,7 +6179,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX9-NEXT:    v_or_b32_e32 v4, v2, v4
 ; GFX9-NEXT:    v_lshlrev_b64 v[1:2], v8, s[0:1]
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v7
 ; GFX9-NEXT:    v_not_b32_e32 v0, v0
 ; GFX9-NEXT:    s_mov_b32 s8, 0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v8, 0, v5, vcc
@@ -6204,7 +6204,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v11, s[2:3]
 ; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v10, s[2:3]
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v10
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v10
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s0
@@ -6234,14 +6234,14 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX10-NEXT:    s_or_b64 s[8:9], s[4:5], s[8:9]
 ; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v2, s[0:1]
 ; GFX10-NEXT:    s_lshr_b64 s[6:7], s[6:7], 1
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v12
+; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v12
 ; GFX10-NEXT:    v_lshlrev_b64 v[6:7], v7, s[0:1]
 ; GFX10-NEXT:    v_add_nc_u32_e32 v14, 0xffffffc0, v13
 ; GFX10-NEXT:    v_lshrrev_b64 v[8:9], v13, s[8:9]
 ; GFX10-NEXT:    v_or_b32_e32 v0, v2, v0
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 64, v13
 ; GFX10-NEXT:    v_lshlrev_b64 v[4:5], v12, s[0:1]
-; GFX10-NEXT:    v_cmp_gt_u32_e64 s1, 64, v13
+; GFX10-NEXT:    v_cmp_ge_u32_e64 s1, 64, v13
 ; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 0, v12
 ; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc_lo
 ; GFX10-NEXT:    v_lshlrev_b64 v[10:11], v2, s[6:7]
@@ -6277,7 +6277,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX11-NEXT:    s_mov_b32 s8, 0
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, 64, v12
 ; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v12, s[2:3]
-; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v12
+; GFX11-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v12
 ; GFX11-NEXT:    v_and_b32_e32 v13, 0x7f, v6
 ; GFX11-NEXT:    v_add_nc_u32_e32 v7, 0xffffffc0, v12
 ; GFX11-NEXT:    v_lshrrev_b64 v[2:3], v2, s[0:1]
@@ -6298,7 +6298,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc_lo
 ; GFX11-NEXT:    v_add_nc_u32_e32 v14, 0xffffffc0, v13
 ; GFX11-NEXT:    v_lshrrev_b64 v[8:9], v13, s[8:9]
-; GFX11-NEXT:    v_cmp_gt_u32_e64 s1, 64, v13
+; GFX11-NEXT:    v_cmp_ge_u32_e64 s1, 64, v13
 ; GFX11-NEXT:    v_cmp_eq_u32_e64 s4, 0, v13
 ; GFX11-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc_lo
 ; GFX11-NEXT:    v_lshrrev_b64 v[0:1], v14, s[6:7]
@@ -7466,7 +7466,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX6-NEXT:    v_or_b32_e32 v19, v19, v17
 ; GFX6-NEXT:    v_lshl_b64 v[16:17], v[0:1], v16
 ; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v23
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v23
+; GFX6-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v23
 ; GFX6-NEXT:    v_cndmask_b32_e32 v26, 0, v0, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v16, v21, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v16, v17, v22, vcc
@@ -7475,7 +7475,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX6-NEXT:    v_cndmask_b32_e64 v16, v16, v3, s[4:5]
 ; GFX6-NEXT:    v_add_i32_e64 v0, s[4:5], v24, v25
 ; GFX6-NEXT:    v_lshr_b64 v[2:3], v[10:11], v0
-; GFX6-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v24
+; GFX6-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v24
 ; GFX6-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v18, 0, v1, vcc
 ; GFX6-NEXT:    v_lshr_b64 v[0:1], v[10:11], v24
@@ -7498,7 +7498,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX6-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GFX6-NEXT:    v_lshl_b64 v[8:9], v[4:5], v17
 ; GFX6-NEXT:    v_lshl_b64 v[4:5], v[4:5], v16
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v17
+; GFX6-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v17
 ; GFX6-NEXT:    v_cndmask_b32_e32 v16, 0, v8, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v18, 0, v9, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
@@ -7520,7 +7520,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX6-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GFX6-NEXT:    v_lshr_b64 v[8:9], v[6:7], v12
 ; GFX6-NEXT:    v_lshr_b64 v[6:7], v[6:7], v13
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v12
+; GFX6-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v12
 ; GFX6-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v12
@@ -7558,7 +7558,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX8-NEXT:    v_or_b32_e32 v19, v19, v17
 ; GFX8-NEXT:    v_lshlrev_b64 v[16:17], v16, v[0:1]
 ; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v23, v[0:1]
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v23
+; GFX8-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v23
 ; GFX8-NEXT:    v_cndmask_b32_e32 v26, 0, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v16, v21, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v16, v17, v22, vcc
@@ -7567,7 +7567,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX8-NEXT:    v_cndmask_b32_e64 v16, v16, v3, s[4:5]
 ; GFX8-NEXT:    v_add_u32_e64 v0, s[4:5], v24, v25
 ; GFX8-NEXT:    v_lshrrev_b64 v[2:3], v0, v[10:11]
-; GFX8-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v24
+; GFX8-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v24
 ; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v18, 0, v1, vcc
 ; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v24, v[10:11]
@@ -7590,7 +7590,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX8-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GFX8-NEXT:    v_lshlrev_b64 v[8:9], v17, v[4:5]
 ; GFX8-NEXT:    v_lshlrev_b64 v[4:5], v16, v[4:5]
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v17
+; GFX8-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v17
 ; GFX8-NEXT:    v_cndmask_b32_e32 v16, 0, v8, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v18, 0, v9, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
@@ -7612,7 +7612,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX8-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GFX8-NEXT:    v_lshrrev_b64 v[8:9], v12, v[6:7]
 ; GFX8-NEXT:    v_lshrrev_b64 v[6:7], v13, v[6:7]
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v12
+; GFX8-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v12
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v12
@@ -7643,7 +7643,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX9-NEXT:    v_or_b32_e32 v22, v18, v22
 ; GFX9-NEXT:    v_lshlrev_b64 v[16:17], v16, v[10:11]
 ; GFX9-NEXT:    v_lshrrev_b64 v[18:19], v24, v[8:9]
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v23
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v23
 ; GFX9-NEXT:    v_or_b32_e32 v18, v18, v16
 ; GFX9-NEXT:    v_add_u32_e32 v16, 0xffffffc0, v23
 ; GFX9-NEXT:    v_or_b32_e32 v19, v19, v17
@@ -7657,7 +7657,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX9-NEXT:    v_add_u32_e32 v0, 0xffffffc0, v24
 ; GFX9-NEXT:    v_cndmask_b32_e64 v16, v16, v3, s[4:5]
 ; GFX9-NEXT:    v_lshrrev_b64 v[2:3], v0, v[10:11]
-; GFX9-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v24
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v24
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v18, 0, v1, vcc
 ; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v24, v[10:11]
@@ -7680,7 +7680,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX9-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GFX9-NEXT:    v_lshlrev_b64 v[8:9], v16, v[4:5]
 ; GFX9-NEXT:    v_lshlrev_b64 v[4:5], v17, v[4:5]
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v16
 ; GFX9-NEXT:    v_cndmask_b32_e32 v17, 0, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v18, 0, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
@@ -7701,7 +7701,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX9-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GFX9-NEXT:    v_lshrrev_b64 v[8:9], v13, v[6:7]
 ; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v14, v[6:7]
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v13
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v13
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
@@ -7731,12 +7731,12 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v25, 64, v28
 ; GFX10-NEXT:    v_lshlrev_b64 v[16:17], v27, v[0:1]
 ; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v23, v[0:1]
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v27
+; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v27
 ; GFX10-NEXT:    v_or_b32_e32 v18, v18, v21
 ; GFX10-NEXT:    v_add_nc_u32_e32 v21, 0xffffffc0, v28
 ; GFX10-NEXT:    v_lshrrev_b64 v[23:24], v28, v[8:9]
 ; GFX10-NEXT:    v_lshlrev_b64 v[25:26], v25, v[10:11]
-; GFX10-NEXT:    v_cmp_gt_u32_e64 s5, 64, v28
+; GFX10-NEXT:    v_cmp_ge_u32_e64 s5, 64, v28
 ; GFX10-NEXT:    v_cndmask_b32_e32 v29, v0, v18, vcc_lo
 ; GFX10-NEXT:    v_or_b32_e32 v0, v19, v22
 ; GFX10-NEXT:    v_lshrrev_b64 v[18:19], v21, v[10:11]
@@ -7771,7 +7771,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v18, 64, v20
 ; GFX10-NEXT:    v_lshlrev_b64 v[12:13], v23, v[4:5]
 ; GFX10-NEXT:    v_lshlrev_b64 v[4:5], v17, v[4:5]
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v23
+; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v23
 ; GFX10-NEXT:    v_or_b32_e32 v10, v2, v10
 ; GFX10-NEXT:    v_add_nc_u32_e32 v26, 0xffffffc0, v20
 ; GFX10-NEXT:    v_lshrrev_b64 v[16:17], v20, v[8:9]
@@ -7780,7 +7780,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX10-NEXT:    v_or_b32_e32 v11, v3, v11
 ; GFX10-NEXT:    v_cndmask_b32_e32 v21, v4, v10, vcc_lo
 ; GFX10-NEXT:    v_lshrrev_b64 v[3:4], v26, v[14:15]
-; GFX10-NEXT:    v_cmp_gt_u32_e64 s5, 64, v20
+; GFX10-NEXT:    v_cmp_ge_u32_e64 s5, 64, v20
 ; GFX10-NEXT:    v_or_b32_e32 v10, v16, v18
 ; GFX10-NEXT:    v_or_b32_e32 v16, v17, v19
 ; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v11, vcc_lo
@@ -7813,7 +7813,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
 ; GFX11-NEXT:    v_lshlrev_b64 v[16:17], v27, v[0:1]
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v18, 64, v27
-; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v27
+; GFX11-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v27
 ; GFX11-NEXT:    v_add_nc_u32_e32 v23, 0xffffffc0, v27
 ; GFX11-NEXT:    v_lshl_or_b32 v9, v10, 31, v9
 ; GFX11-NEXT:    v_lshrrev_b64 v[10:11], 1, v[10:11]
@@ -7830,7 +7830,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX11-NEXT:    v_add_nc_u32_e32 v21, 0xffffffc0, v28
 ; GFX11-NEXT:    v_lshrrev_b64 v[23:24], v28, v[8:9]
 ; GFX11-NEXT:    v_or_b32_e32 v0, v19, v22
-; GFX11-NEXT:    v_cmp_gt_u32_e64 s1, 64, v28
+; GFX11-NEXT:    v_cmp_ge_u32_e64 s1, 64, v28
 ; GFX11-NEXT:    v_lshlrev_b64 v[25:26], v25, v[10:11]
 ; GFX11-NEXT:    v_lshrrev_b64 v[18:19], v21, v[10:11]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
@@ -7861,7 +7861,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX11-NEXT:    v_or_b32_e32 v1, v17, v3
 ; GFX11-NEXT:    v_lshlrev_b64 v[10:11], v23, v[6:7]
 ; GFX11-NEXT:    v_lshrrev_b64 v[2:3], v2, v[4:5]
-; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v23
+; GFX11-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v23
 ; GFX11-NEXT:    v_add_nc_u32_e32 v17, 0xffffffc0, v23
 ; GFX11-NEXT:    v_lshl_or_b32 v9, v14, 31, v9
 ; GFX11-NEXT:    v_lshrrev_b64 v[14:15], 1, v[14:15]
@@ -7876,7 +7876,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX11-NEXT:    v_or_b32_e32 v11, v3, v11
 ; GFX11-NEXT:    v_cndmask_b32_e32 v21, v4, v10, vcc_lo
 ; GFX11-NEXT:    v_lshrrev_b64 v[3:4], v26, v[14:15]
-; GFX11-NEXT:    v_cmp_gt_u32_e64 s1, 64, v20
+; GFX11-NEXT:    v_cmp_ge_u32_e64 s1, 64, v20
 ; GFX11-NEXT:    v_or_b32_e32 v10, v16, v18
 ; GFX11-NEXT:    v_or_b32_e32 v16, v17, v19
 ; GFX11-NEXT:    v_cndmask_b32_e32 v5, v5, v11, vcc_lo
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
index 238cc06fc7f7c..bc3222228e6dc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
@@ -22,10 +22,10 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, -7, v0
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, -7, v0
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 6, v0
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0x7f, v0
@@ -52,10 +52,10 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, -7, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, -7, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX8-NEXT:    v_sub_u16_e32 v1, 6, v0
 ; GFX8-NEXT:    v_and_b32_e32 v0, 0x7f, v0
@@ -82,10 +82,10 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
 ; GFX9-NEXT:    v_add_u32_e32 v1, -7, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v1, -7, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX9-NEXT:    v_sub_u16_e32 v1, 6, v0
 ; GFX9-NEXT:    v_and_b32_e32 v0, 0x7f, v0
@@ -112,10 +112,10 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, -7, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 7, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, -7, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 7, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX10-NEXT:    v_sub_nc_u16 v1, 6, v0
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0x7f, v0
@@ -148,11 +148,11 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_add_nc_u32_e32 v1, -7, v0
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 7, v0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_add_nc_u32_e32 v1, -7, v0
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 7, v0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_sub_nc_u16 v1, 6, v0
@@ -187,10 +187,10 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, -7, v2
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, -7, v2
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 6, v2
 ; GFX6-NEXT:    v_and_b32_e32 v2, 0x7f, v2
@@ -217,10 +217,10 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, -7, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, -7, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_sub_u16_e32 v3, 6, v2
 ; GFX8-NEXT:    v_and_b32_e32 v2, 0x7f, v2
@@ -247,10 +247,10 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
 ; GFX9-NEXT:    v_add_u32_e32 v3, -7, v2
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, -7, v2
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 7, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX9-NEXT:    v_sub_u16_e32 v3, 6, v2
 ; GFX9-NEXT:    v_and_b32_e32 v2, 0x7f, v2
@@ -277,10 +277,10 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, -7, v2
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 7, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, -7, v2
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 7, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX10-NEXT:    v_sub_nc_u16 v3, 6, v2
 ; GFX10-NEXT:    v_and_b32_e32 v2, 0x7f, v2
@@ -313,11 +313,11 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, -7, v2
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 7, v2
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, -7, v2
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 7, v2
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_sub_nc_u16 v3, 6, v2
@@ -1471,10 +1471,10 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v0, v1
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v0, v1
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 23, v0
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
@@ -1502,10 +1502,10 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v0, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v0, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 23, v0
 ; GFX8-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
@@ -1533,10 +1533,10 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0xffffffe8, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0xffffffe8, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v1, 23, v0
 ; GFX9-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
@@ -1562,10 +1562,10 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v1, 23, v0
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
@@ -1597,11 +1597,11 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v1, 23, v0
@@ -1636,10 +1636,10 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 24
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v2, v4
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0xffffffe8, v2
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 23, v2
 ; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
@@ -1667,10 +1667,10 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 24
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v4
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xffffffe8, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 23, v2
 ; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
@@ -1698,10 +1698,10 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 24
 ; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
 ; GFX9-NEXT:    v_add_u32_e32 v3, 0xffffffe8, v2
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, 0xffffffe8, v2
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v3, 23, v2
 ; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
@@ -1727,10 +1727,10 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 24
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 23, v2
 ; GFX10-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
@@ -1762,11 +1762,11 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v2
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v2
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v3, 23, v2
@@ -1840,23 +1840,23 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX6-NEXT:    v_mul_hi_u32 v2, v5, v2
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s2, v4
 ; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v4, v3
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v4
 ; GFX6-NEXT:    v_mul_lo_u32 v2, v2, 24
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v4, v3
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v4
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v5, v2
 ; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 23, v4
 ; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v2, v3
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
 ; GFX6-NEXT:    s_and_b32 s6, s6, 0xff
 ; GFX6-NEXT:    s_and_b32 s8, 0xffff, s8
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v2, v3
 ; GFX6-NEXT:    s_lshl_b32 s2, s6, 17
 ; GFX6-NEXT:    s_lshl_b32 s3, s8, 1
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX6-NEXT:    s_or_b32 s2, s2, s3
 ; GFX6-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
@@ -1954,10 +1954,10 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s4, v2
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v1
 ; GFX8-NEXT:    v_mul_hi_u32 v0, s5, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 23, v2
@@ -1971,10 +1971,10 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s5, v0
 ; GFX8-NEXT:    v_or_b32_e32 v2, v3, v2
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v0, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v0, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 23, v0
 ; GFX8-NEXT:    s_lshl_b32 s0, s7, 17
@@ -2062,11 +2062,11 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX9-NEXT:    v_mul_hi_u32 v0, s5, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v1, s4, v1
 ; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v2, 23, v1
 ; GFX9-NEXT:    s_lshl_b32 s4, s6, 17
@@ -2078,10 +2078,10 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX9-NEXT:    v_sub_u32_e32 v0, s5, v0
 ; GFX9-NEXT:    v_lshl_or_b32 v1, s0, v2, v1
 ; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v2, 23, v0
 ; GFX9-NEXT:    s_lshl_b32 s0, s7, 17
@@ -2176,17 +2176,17 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX10-NEXT:    s_lshl_b32 s5, s11, 16
 ; GFX10-NEXT:    s_or_b32 s2, s2, s4
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v4, 0xffffffe8, v0
 ; GFX10-NEXT:    s_or_b32 s2, s2, s5
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v4, 0xffffffe8, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v0
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 23, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
 ; GFX10-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
@@ -2281,21 +2281,21 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX11-NEXT:    s_lshl_b32 s5, s11, 16
 ; GFX11-NEXT:    s_or_b32 s2, s2, s4
 ; GFX11-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v1
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v1
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v0
 ; GFX11-NEXT:    s_or_b32 s2, s2, s5
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v0
 ; GFX11-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v1
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v1
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, 23, v1
 ; GFX11-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v0
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
 ; GFX11-NEXT:    v_lshrrev_b32_e64 v1, v1, s2
@@ -2361,10 +2361,10 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX6-NEXT:    v_mul_lo_u32 v6, v6, 24
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v8
 ; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v4, v7
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v4
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v4, v7
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v4
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, 23, v4
 ; GFX6-NEXT:    v_and_b32_e32 v8, 0xffffff, v8
@@ -2374,10 +2374,10 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v5, v6
 ; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v2, v7
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 0xffffffe8, v2
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 23, v2
 ; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
@@ -2410,10 +2410,10 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX8-NEXT:    v_mul_lo_u32 v6, v6, 24
 ; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v4, v8
 ; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v4, v7
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v4
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v4, v7
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v4
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, 23, v4
 ; GFX8-NEXT:    v_and_b32_e32 v8, 0xffffff, v8
@@ -2423,10 +2423,10 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v5, v6
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v2, v7
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0xffffffe8, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 23, v2
 ; GFX8-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
@@ -2460,10 +2460,10 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX9-NEXT:    v_sub_u32_e32 v4, v4, v7
 ; GFX9-NEXT:    v_sub_u32_e32 v5, v5, v6
 ; GFX9-NEXT:    v_add_u32_e32 v6, 0xffffffe8, v4
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v6, 0xffffffe8, v4
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v6, 23, v4
 ; GFX9-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
@@ -2471,10 +2471,10 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
 ; GFX9-NEXT:    v_lshl_or_b32 v0, v0, v6, v2
 ; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v5
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v5
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v4, 0xffffffe8, v2
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, 24, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v4, 23, v2
 ; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
@@ -2506,16 +2506,16 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v4, v4, v7
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v5, v5, v6
 ; GFX10-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v4
 ; GFX10-NEXT:    v_add_nc_u32_e32 v7, 0xffffffe8, v5
 ; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v5
 ; GFX10-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
 ; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v4
 ; GFX10-NEXT:    v_add_nc_u32_e32 v7, 0xffffffe8, v5
 ; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v5
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v6, 23, v4
 ; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
 ; GFX10-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
@@ -2551,7 +2551,7 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX11-NEXT:    v_mul_lo_u32 v7, v7, 24
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v4, v4, v7
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v4
 ; GFX11-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_mul_hi_u32 v6, v5, v6
@@ -2561,14 +2561,14 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
 ; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v5
 ; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
 ; GFX11-NEXT:    v_add_nc_u32_e32 v7, 0xffffffe8, v5
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v4
 ; GFX11-NEXT:    v_dual_cndmask_b32 v4, v4, v6 :: v_dual_add_nc_u32 v7, 0xffffffe8, v5
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 24, v5
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v6, 23, v4
 ; GFX11-NEXT:    v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4
@@ -5859,7 +5859,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX6-NEXT:    v_or_b32_e32 v11, v0, v11
 ; GFX6-NEXT:    v_or_b32_e32 v12, v1, v12
 ; GFX6-NEXT:    v_lshl_b64 v[0:1], v[9:10], v17
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
+; GFX6-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v15
 ; GFX6-NEXT:    v_cndmask_b32_e32 v10, 0, v13, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v13, 0, v14, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
@@ -5876,7 +5876,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX6-NEXT:    v_lshr_b64 v[0:1], v[6:7], v15
 ; GFX6-NEXT:    v_lshr_b64 v[8:9], v[6:7], v14
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; GFX6-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v14
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
@@ -5908,7 +5908,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX8-NEXT:    v_or_b32_e32 v11, v0, v11
 ; GFX8-NEXT:    v_or_b32_e32 v12, v1, v12
 ; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v17, v[9:10]
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
+; GFX8-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v15
 ; GFX8-NEXT:    v_cndmask_b32_e32 v10, 0, v13, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v13, 0, v14, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
@@ -5925,7 +5925,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v15, v[6:7]
 ; GFX8-NEXT:    v_lshrrev_b64 v[8:9], v14, v[6:7]
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; GFX8-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v14
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
@@ -5956,7 +5956,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX9-NEXT:    v_or_b32_e32 v11, v0, v11
 ; GFX9-NEXT:    v_or_b32_e32 v12, v1, v12
 ; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v16, v[9:10]
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v15
 ; GFX9-NEXT:    v_cndmask_b32_e32 v10, 0, v13, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v13, 0, v14, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
@@ -5973,7 +5973,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v15, v[6:7]
 ; GFX9-NEXT:    v_lshrrev_b64 v[8:9], v14, v[6:7]
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v14
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
@@ -6007,11 +6007,11 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v14, v[0:1]
 ; GFX10-NEXT:    v_lshrrev_b64 v[14:15], v21, v[4:5]
 ; GFX10-NEXT:    v_lshlrev_b64 v[16:17], v16, v[6:7]
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v20
+; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v20
 ; GFX10-NEXT:    v_lshrrev_b64 v[18:19], v18, v[6:7]
 ; GFX10-NEXT:    v_or_b32_e32 v10, v12, v10
 ; GFX10-NEXT:    v_or_b32_e32 v11, v13, v11
-; GFX10-NEXT:    v_cmp_gt_u32_e64 s5, 64, v21
+; GFX10-NEXT:    v_cmp_ge_u32_e64 s5, 64, v21
 ; GFX10-NEXT:    v_or_b32_e32 v12, v15, v17
 ; GFX10-NEXT:    v_cmp_eq_u32_e64 s4, 0, v20
 ; GFX10-NEXT:    v_cndmask_b32_e32 v10, v0, v10, vcc_lo
@@ -6047,7 +6047,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX11-NEXT:    v_or_b32_e32 v2, v2, v10
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v12, 64, v20
-; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v20
+; GFX11-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v20
 ; GFX11-NEXT:    v_lshlrev_b64 v[10:11], v20, v[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshrrev_b64 v[12:13], v12, v[0:1]
@@ -6064,7 +6064,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v16, 64, v21
 ; GFX11-NEXT:    v_add_nc_u32_e32 v18, 0xffffffc0, v21
 ; GFX11-NEXT:    v_lshrrev_b64 v[14:15], v21, v[4:5]
-; GFX11-NEXT:    v_cmp_gt_u32_e64 s1, 64, v21
+; GFX11-NEXT:    v_cmp_ge_u32_e64 s1, 64, v21
 ; GFX11-NEXT:    v_cndmask_b32_e32 v11, v1, v11, vcc_lo
 ; GFX11-NEXT:    v_lshlrev_b64 v[16:17], v16, v[6:7]
 ; GFX11-NEXT:    v_lshrrev_b64 v[18:19], v18, v[6:7]
@@ -6115,7 +6115,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX6-NEXT:    v_or_b32_e32 v4, v2, v4
 ; GFX6-NEXT:    v_lshl_b64 v[1:2], s[8:9], v9
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
+; GFX6-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v7
 ; GFX6-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -6134,7 +6134,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX6-NEXT:    v_lshr_b64 v[0:1], s[6:7], v8
 ; GFX6-NEXT:    v_lshr_b64 v[4:5], s[6:7], v11
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
+; GFX6-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v11
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s4
@@ -6168,7 +6168,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX8-NEXT:    v_or_b32_e32 v4, v2, v4
 ; GFX8-NEXT:    v_lshlrev_b64 v[1:2], v9, s[8:9]
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
+; GFX8-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v7
 ; GFX8-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -6187,7 +6187,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v8, s[6:7]
 ; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v11, s[6:7]
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
+; GFX8-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v11
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s4
@@ -6220,7 +6220,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX9-NEXT:    v_or_b32_e32 v4, v2, v4
 ; GFX9-NEXT:    v_lshlrev_b64 v[1:2], v8, s[8:9]
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v7
 ; GFX9-NEXT:    v_cndmask_b32_e32 v8, 0, v5, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -6239,7 +6239,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v11, s[6:7]
 ; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v10, s[6:7]
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v10
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v10
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s4
@@ -6270,12 +6270,12 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX10-NEXT:    v_add_nc_u32_e32 v6, 0xffffffc0, v12
 ; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v12, s[8:9]
 ; GFX10-NEXT:    v_add_nc_u32_e32 v14, 0xffffffc0, v13
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v12
+; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v12
 ; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v2, s[10:11]
 ; GFX10-NEXT:    v_lshlrev_b64 v[6:7], v6, s[10:11]
 ; GFX10-NEXT:    v_lshrrev_b64 v[8:9], v13, s[4:5]
 ; GFX10-NEXT:    v_lshlrev_b64 v[10:11], v10, s[6:7]
-; GFX10-NEXT:    v_cmp_gt_u32_e64 s1, 64, v13
+; GFX10-NEXT:    v_cmp_ge_u32_e64 s1, 64, v13
 ; GFX10-NEXT:    v_lshlrev_b64 v[4:5], v12, s[10:11]
 ; GFX10-NEXT:    v_or_b32_e32 v0, v2, v0
 ; GFX10-NEXT:    v_or_b32_e32 v2, v3, v1
@@ -6317,14 +6317,14 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v10, 64, v13
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, 64, v12
 ; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v12, s[8:9]
-; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v12
+; GFX11-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v12
 ; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0xffffffc0, v12
 ; GFX11-NEXT:    v_add_nc_u32_e32 v14, 0xffffffc0, v13
 ; GFX11-NEXT:    v_lshrrev_b64 v[2:3], v2, s[10:11]
 ; GFX11-NEXT:    v_lshrrev_b64 v[8:9], v13, s[4:5]
 ; GFX11-NEXT:    v_lshlrev_b64 v[10:11], v10, s[6:7]
 ; GFX11-NEXT:    v_lshlrev_b64 v[6:7], v6, s[10:11]
-; GFX11-NEXT:    v_cmp_gt_u32_e64 s1, 64, v13
+; GFX11-NEXT:    v_cmp_ge_u32_e64 s1, 64, v13
 ; GFX11-NEXT:    v_cmp_eq_u32_e64 s2, 0, v13
 ; GFX11-NEXT:    v_or_b32_e32 v0, v2, v0
 ; GFX11-NEXT:    v_or_b32_e32 v2, v3, v1
@@ -7505,7 +7505,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v19, v26
 ; GFX6-NEXT:    v_or_b32_e32 v22, v22, v1
 ; GFX6-NEXT:    v_lshl_b64 v[0:1], v[17:18], v0
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v19
+; GFX6-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v19
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v23, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v24, vcc
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v19
@@ -7514,7 +7514,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX6-NEXT:    v_add_i32_e64 v0, s[4:5], v25, v26
 ; GFX6-NEXT:    v_lshl_b64 v[16:17], v[17:18], v19
 ; GFX6-NEXT:    v_lshr_b64 v[0:1], v[10:11], v0
-; GFX6-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v25
+; GFX6-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v25
 ; GFX6-NEXT:    v_cndmask_b32_e32 v16, 0, v16, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e64 v18, v0, v21, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v19, v1, v22, s[4:5]
@@ -7542,7 +7542,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX6-NEXT:    v_or_b32_e32 v11, v5, v11
 ; GFX6-NEXT:    v_lshl_b64 v[4:5], v[8:9], v16
 ; GFX6-NEXT:    v_lshl_b64 v[8:9], v[8:9], v17
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
+; GFX6-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v16
 ; GFX6-NEXT:    v_or_b32_e32 v3, v3, v18
 ; GFX6-NEXT:    v_cndmask_b32_e32 v17, 0, v4, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v18, 0, v5, vcc
@@ -7560,7 +7560,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX6-NEXT:    v_or_b32_e32 v19, v5, v7
 ; GFX6-NEXT:    v_lshr_b64 v[6:7], v[14:15], v11
 ; GFX6-NEXT:    v_lshr_b64 v[4:5], v[14:15], v10
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v10
+; GFX6-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v10
 ; GFX6-NEXT:    v_cndmask_b32_e32 v6, v6, v16, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v7, v7, v19, vcc
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v10
@@ -7597,7 +7597,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v19, v26
 ; GFX8-NEXT:    v_or_b32_e32 v22, v22, v1
 ; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v0, v[17:18]
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v19
+; GFX8-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v19
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v23, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v24, vcc
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v19
@@ -7606,7 +7606,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX8-NEXT:    v_add_u32_e64 v0, s[4:5], v25, v26
 ; GFX8-NEXT:    v_lshlrev_b64 v[16:17], v19, v[17:18]
 ; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v0, v[10:11]
-; GFX8-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v25
+; GFX8-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v25
 ; GFX8-NEXT:    v_cndmask_b32_e32 v16, 0, v16, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v18, v0, v21, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v19, v1, v22, s[4:5]
@@ -7634,7 +7634,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX8-NEXT:    v_or_b32_e32 v11, v5, v11
 ; GFX8-NEXT:    v_lshlrev_b64 v[4:5], v16, v[8:9]
 ; GFX8-NEXT:    v_lshlrev_b64 v[8:9], v17, v[8:9]
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
+; GFX8-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v16
 ; GFX8-NEXT:    v_or_b32_e32 v3, v3, v18
 ; GFX8-NEXT:    v_cndmask_b32_e32 v17, 0, v4, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v18, 0, v5, vcc
@@ -7652,7 +7652,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX8-NEXT:    v_or_b32_e32 v19, v5, v7
 ; GFX8-NEXT:    v_lshrrev_b64 v[6:7], v11, v[14:15]
 ; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v10, v[14:15]
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v10
+; GFX8-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v10
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v16, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v19, vcc
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v10
@@ -7684,7 +7684,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX9-NEXT:    v_or_b32_e32 v24, v1, v22
 ; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v0, v[10:11]
 ; GFX9-NEXT:    v_lshrrev_b64 v[21:22], v25, v[8:9]
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v19
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v19
 ; GFX9-NEXT:    v_or_b32_e32 v21, v21, v0
 ; GFX9-NEXT:    v_add_u32_e32 v0, 0xffffffc0, v19
 ; GFX9-NEXT:    v_or_b32_e32 v22, v22, v1
@@ -7697,7 +7697,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX9-NEXT:    v_lshlrev_b64 v[16:17], v19, v[17:18]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, v1, v3, s[4:5]
 ; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v0, v[10:11]
-; GFX9-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v25
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v25
 ; GFX9-NEXT:    v_cndmask_b32_e32 v16, 0, v16, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v18, v0, v21, s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v19, v1, v22, s[4:5]
@@ -7726,7 +7726,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX9-NEXT:    v_or_b32_e32 v11, v5, v11
 ; GFX9-NEXT:    v_lshlrev_b64 v[4:5], v16, v[8:9]
 ; GFX9-NEXT:    v_lshlrev_b64 v[8:9], v17, v[8:9]
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v16
 ; GFX9-NEXT:    v_cndmask_b32_e32 v17, 0, v4, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v18, 0, v5, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v8, v10, vcc
@@ -7743,7 +7743,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX9-NEXT:    v_or_b32_e32 v19, v5, v7
 ; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v11, v[14:15]
 ; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v10, v[14:15]
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v10
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v10
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v6, v16, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v7, v19, vcc
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v10
@@ -7772,7 +7772,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX10-NEXT:    v_or_b32_e32 v2, v2, v17
 ; GFX10-NEXT:    v_add_nc_u32_e32 v19, 0xffffffc0, v25
 ; GFX10-NEXT:    v_lshlrev_b64 v[23:24], v25, v[0:1]
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v25
+; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v25
 ; GFX10-NEXT:    v_lshrrev_b64 v[17:18], v18, v[0:1]
 ; GFX10-NEXT:    v_lshlrev_b64 v[21:22], v25, v[2:3]
 ; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v19, v[0:1]
@@ -7787,7 +7787,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX10-NEXT:    v_lshlrev_b64 v[18:19], v18, v[10:11]
 ; GFX10-NEXT:    v_cndmask_b32_e32 v21, v0, v21, vcc_lo
 ; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v27, v[10:11]
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v26
+; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v26
 ; GFX10-NEXT:    v_cndmask_b32_e64 v22, v22, v3, s4
 ; GFX10-NEXT:    v_or_b32_e32 v16, v16, v18
 ; GFX10-NEXT:    v_cndmask_b32_e64 v21, v21, v2, s4
@@ -7814,14 +7814,14 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v25, v[6:7]
 ; GFX10-NEXT:    v_lshlrev_b64 v[4:5], v16, v[4:5]
 ; GFX10-NEXT:    v_or_b32_e32 v0, v23, v0
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v25
+; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v25
 ; GFX10-NEXT:    v_add_nc_u32_e32 v23, 0xffffffc0, v20
 ; GFX10-NEXT:    v_lshrrev_b64 v[16:17], v20, v[12:13]
 ; GFX10-NEXT:    v_or_b32_e32 v8, v2, v8
 ; GFX10-NEXT:    v_lshlrev_b64 v[18:19], v18, v[14:15]
 ; GFX10-NEXT:    v_or_b32_e32 v2, v21, v26
 ; GFX10-NEXT:    v_or_b32_e32 v9, v3, v9
-; GFX10-NEXT:    v_cmp_gt_u32_e64 s5, 64, v20
+; GFX10-NEXT:    v_cmp_ge_u32_e64 s5, 64, v20
 ; GFX10-NEXT:    v_cndmask_b32_e32 v21, v4, v8, vcc_lo
 ; GFX10-NEXT:    v_lshrrev_b64 v[3:4], v23, v[14:15]
 ; GFX10-NEXT:    v_or_b32_e32 v8, v16, v18
@@ -7861,7 +7861,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v18, 64, v25
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
 ; GFX11-NEXT:    v_or_b32_e32 v2, v2, v17
-; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v25
+; GFX11-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v25
 ; GFX11-NEXT:    v_lshlrev_b64 v[23:24], v25, v[0:1]
 ; GFX11-NEXT:    v_and_b32_e32 v26, 0x7f, v16
 ; GFX11-NEXT:    v_lshrrev_b64 v[17:18], v18, v[0:1]
@@ -7877,7 +7877,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_lshlrev_b64 v[18:19], v18, v[10:11]
 ; GFX11-NEXT:    v_dual_cndmask_b32 v21, v0, v21 :: v_dual_cndmask_b32 v22, v1, v22
-; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v26
+; GFX11-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v26
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_or_b32_e32 v16, v16, v18
 ; GFX11-NEXT:    v_add_nc_u32_e32 v27, 0xffffffc0, v26
@@ -7902,11 +7902,11 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, v9, s0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
 ; GFX11-NEXT:    v_lshlrev_b64 v[10:11], v25, v[4:5]
-; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v25
+; GFX11-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v25
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v18, 64, v20
 ; GFX11-NEXT:    v_or_b32_e32 v0, v23, v0
 ; GFX11-NEXT:    v_add_nc_u32_e32 v23, 0xffffffc0, v20
-; GFX11-NEXT:    v_cmp_gt_u32_e64 s1, 64, v20
+; GFX11-NEXT:    v_cmp_ge_u32_e64 s1, 64, v20
 ; GFX11-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc_lo
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v8, 64, v25
 ; GFX11-NEXT:    v_add_nc_u32_e32 v16, 0xffffffc0, v25
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/is-safe-to-sink-bug.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/is-safe-to-sink-bug.ll
index d3bc661f5940b..c12ae21a99e5b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/is-safe-to-sink-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/is-safe-to-sink-bug.ll
@@ -45,7 +45,7 @@ define amdgpu_ps void @_amdgpu_ps_main(i1 %arg) {
 ; CHECK-NEXT:    s_and_b32 s2, 1, s2
 ; CHECK-NEXT:    v_or_b32_e32 v1, 1, v0
 ; CHECK-NEXT:    v_cmp_ne_u32_e64 s2, 0, s2
-; CHECK-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 0, v0
+; CHECK-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 0, v0
 ; CHECK-NEXT:    v_mov_b32_e32 v0, v1
 ; CHECK-NEXT:    s_and_b32 s4, s2, s1
 ; CHECK-NEXT:    s_andn2_b32 s1, s1, exec_lo
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll
index 618dd45ee0bb2..ccf101cbb901e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll
@@ -56,7 +56,7 @@ define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) {
 define amdgpu_cs i32 @compare_int_with_constant(i32 %x) {
 ; CHECK-LABEL: compare_int_with_constant:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_le_i32_e64 s0, 0x63, v0
+; CHECK-NEXT:    v_cmp_lt_i32_e64 s0, 0x63, v0
 ; CHECK-NEXT:    ; return to shader part epilog
   %cmp = icmp sge i32 %x, 99
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
@@ -186,7 +186,7 @@ false:
 define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) {
 ; CHECK-LABEL: branch_divergent_ballot_ne_zero_compare:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 12, v0
 ; CHECK-NEXT:    s_cmp_eq_u32 vcc_lo, 0
 ; CHECK-NEXT:    s_cbranch_scc1 .LBB11_2
 ; CHECK-NEXT:  ; %bb.1: ; %true
@@ -235,7 +235,7 @@ false:
 define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) {
 ; CHECK-LABEL: branch_divergent_ballot_eq_zero_compare:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 12, v0
 ; CHECK-NEXT:    s_cmp_lg_u32 vcc_lo, 0
 ; CHECK-NEXT:    s_cbranch_scc0 .LBB13_2
 ; CHECK-NEXT:  ; %bb.1: ; %false
@@ -284,8 +284,8 @@ false:
 define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) {
 ; CHECK-LABEL: branch_divergent_ballot_ne_zero_and:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT:    v_cmp_le_u32_e64 s0, 34, v1
 ; CHECK-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; CHECK-NEXT:    s_cmp_eq_u32 s0, 0
 ; CHECK-NEXT:    s_cbranch_scc1 .LBB15_2
@@ -342,8 +342,8 @@ false:
 define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) {
 ; CHECK-LABEL: branch_divergent_ballot_eq_zero_and:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT:    v_cmp_le_u32_e64 s0, 34, v1
 ; CHECK-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; CHECK-NEXT:    s_cmp_lg_u32 s0, 0
 ; CHECK-NEXT:    s_cbranch_scc0 .LBB17_2
@@ -434,7 +434,7 @@ define amdgpu_ps void @non_cst_non_compare_input(ptr addrspace(1) %out, i32 %tid
 ; GFX10-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX10-NEXT:    s_xor_b32 s1, exec_lo, s1
 ; GFX10-NEXT:  ; %bb.1: ; %B
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 2, v2
+; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 2, v2
 ; GFX10-NEXT:    s_andn2_b32 s0, s0, exec_lo
 ; GFX10-NEXT:    ; implicit-def: $vgpr2
 ; GFX10-NEXT:    s_and_b32 s2, exec_lo, vcc_lo
@@ -442,7 +442,7 @@ define amdgpu_ps void @non_cst_non_compare_input(ptr addrspace(1) %out, i32 %tid
 ; GFX10-NEXT:  ; %bb.2: ; %Flow
 ; GFX10-NEXT:    s_andn2_saveexec_b32 s1, s1
 ; GFX10-NEXT:  ; %bb.3: ; %A
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 1, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 1, v2
 ; GFX10-NEXT:    s_andn2_b32 s0, s0, exec_lo
 ; GFX10-NEXT:    s_and_b32 s2, exec_lo, vcc_lo
 ; GFX10-NEXT:    s_or_b32 s0, s0, s2
@@ -461,7 +461,7 @@ define amdgpu_ps void @non_cst_non_compare_input(ptr addrspace(1) %out, i32 %tid
 ; GFX11-NEXT:    v_cmpx_ne_u32_e32 0, v3
 ; GFX11-NEXT:    s_xor_b32 s1, exec_lo, s1
 ; GFX11-NEXT:  ; %bb.1: ; %B
-; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 2, v2
+; GFX11-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 2, v2
 ; GFX11-NEXT:    s_and_not1_b32 s0, s0, exec_lo
 ; GFX11-NEXT:    ; implicit-def: $vgpr2
 ; GFX11-NEXT:    s_and_b32 s2, exec_lo, vcc_lo
@@ -469,7 +469,7 @@ define amdgpu_ps void @non_cst_non_compare_input(ptr addrspace(1) %out, i32 %tid
 ; GFX11-NEXT:  ; %bb.2: ; %Flow
 ; GFX11-NEXT:    s_and_not1_saveexec_b32 s1, s1
 ; GFX11-NEXT:  ; %bb.3: ; %A
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 1, v2
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 1, v2
 ; GFX11-NEXT:    s_and_not1_b32 s0, s0, exec_lo
 ; GFX11-NEXT:    s_and_b32 s2, exec_lo, vcc_lo
 ; GFX11-NEXT:    s_or_b32 s0, s0, s2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll
index 0bbb40b8db43a..7642414c54bb7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll
@@ -189,7 +189,7 @@ false:
 define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) {
 ; CHECK-LABEL: branch_divergent_ballot_ne_zero_compare:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 12, v0
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, 12, v0
 ; CHECK-NEXT:    s_cmp_eq_u64 vcc, 0
 ; CHECK-NEXT:    s_cbranch_scc1 .LBB11_2
 ; CHECK-NEXT:  ; %bb.1: ; %true
@@ -238,7 +238,7 @@ false:
 define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) {
 ; CHECK-LABEL: branch_divergent_ballot_eq_zero_compare:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 12, v0
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, 12, v0
 ; CHECK-NEXT:    s_cmp_lg_u64 vcc, 0
 ; CHECK-NEXT:    s_cbranch_scc0 .LBB13_2
 ; CHECK-NEXT:  ; %bb.1: ; %false
@@ -287,8 +287,8 @@ false:
 define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) {
 ; CHECK-LABEL: branch_divergent_ballot_ne_zero_and:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 12, v0
-; CHECK-NEXT:    v_cmp_lt_u32_e64 s[0:1], 34, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, 12, v0
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], 34, v1
 ; CHECK-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
 ; CHECK-NEXT:    s_cmp_eq_u64 s[0:1], 0
 ; CHECK-NEXT:    s_cbranch_scc1 .LBB15_2
@@ -345,8 +345,8 @@ false:
 define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) {
 ; CHECK-LABEL: branch_divergent_ballot_eq_zero_and:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 12, v0
-; CHECK-NEXT:    v_cmp_lt_u32_e64 s[0:1], 34, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, 12, v0
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], 34, v1
 ; CHECK-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
 ; CHECK-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; CHECK-NEXT:    s_cbranch_scc0 .LBB17_2
@@ -437,7 +437,7 @@ define amdgpu_ps void @non_cst_non_compare_input(ptr addrspace(1) %out, i32 %tid
 ; CHECK-NEXT:    s_and_saveexec_b64 s[2:3], vcc
 ; CHECK-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
 ; CHECK-NEXT:  ; %bb.1: ; %B
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 2, v2
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, 2, v2
 ; CHECK-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
 ; CHECK-NEXT:    s_and_b64 s[4:5], exec, vcc
 ; CHECK-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
@@ -445,7 +445,7 @@ define amdgpu_ps void @non_cst_non_compare_input(ptr addrspace(1) %out, i32 %tid
 ; CHECK-NEXT:  ; %bb.2: ; %Flow
 ; CHECK-NEXT:    s_andn2_saveexec_b64 s[2:3], s[2:3]
 ; CHECK-NEXT:  ; %bb.3: ; %A
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 1, v2
+; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, 1, v2
 ; CHECK-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
 ; CHECK-NEXT:    s_and_b64 s[4:5], exec, vcc
 ; CHECK-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll
index e8de761540b7a..5d39093b5dedb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll
@@ -66,7 +66,7 @@ define amdgpu_cs void @memcpy_p1i8(ptr addrspace(1) %dst, ptr addrspace(1) %src)
 ; LOOP-NEXT:    v_addc_u32_e32 v7, vcc, v1, v5, vcc
 ; LOOP-NEXT:    v_add_i32_e32 v4, vcc, 32, v4
 ; LOOP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
-; LOOP-NEXT:    v_cmp_gt_u32_e32 vcc, 32, v4
+; LOOP-NEXT:    v_cmp_ge_u32_e32 vcc, 32, v4
 ; LOOP-NEXT:    v_lshlrev_b32_e32 v11, 8, v11
 ; LOOP-NEXT:    v_lshlrev_b32_e32 v13, 24, v13
 ; LOOP-NEXT:    v_lshlrev_b32_e32 v12, 16, v12
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll
index 7cd3babc70909..74d598091523f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll
@@ -18,7 +18,7 @@ define amdgpu_cs void @memset_p1i8(ptr addrspace(1) %dst, i8 %val) {
 ; LOOP-NEXT:    v_addc_u32_e32 v6, vcc, v1, v4, vcc
 ; LOOP-NEXT:    v_add_i32_e32 v3, vcc, 1, v3
 ; LOOP-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
-; LOOP-NEXT:    v_cmp_gt_u32_e32 vcc, 4, v3
+; LOOP-NEXT:    v_cmp_ge_u32_e32 vcc, 4, v3
 ; LOOP-NEXT:    buffer_store_byte v2, v[5:6], s[0:3], 0 addr64
 ; LOOP-NEXT:    s_cbranch_vccnz .LBB0_1
 ; LOOP-NEXT:  ; %bb.2: ; %split
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
index 6baa10bb48621..60a99551338ac 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
@@ -1637,7 +1637,7 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
 ; GFX6-NEXT:    v_lshr_b64 v[4:5], v[4:5], v2
 ; GFX6-NEXT:    v_or_b32_e32 v6, v6, v8
 ; GFX6-NEXT:    v_or_b32_e32 v7, v7, v9
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
+; GFX6-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v3
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v5, v7, vcc
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
@@ -1660,7 +1660,7 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
 ; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v2, v[4:5]
 ; GFX8-NEXT:    v_or_b32_e32 v6, v6, v8
 ; GFX8-NEXT:    v_or_b32_e32 v7, v7, v9
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
+; GFX8-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v5, v7, vcc
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
@@ -1683,7 +1683,7 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
 ; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v2, v[4:5]
 ; GFX9-NEXT:    v_or_b32_e32 v6, v6, v8
 ; GFX9-NEXT:    v_or_b32_e32 v7, v7, v9
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v5, v7, vcc
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
@@ -1701,7 +1701,7 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 64, v3
 ; GFX10-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v3
 ; GFX10-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v3
+; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v3
 ; GFX10-NEXT:    v_cmp_eq_u32_e64 s4, 0, v3
 ; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v2, v[4:5]
 ; GFX10-NEXT:    v_lshrrev_b64 v[10:11], v10, v[4:5]
@@ -1718,7 +1718,7 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
 ; GFX11-LABEL: v_lshr_i65:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v3
+; GFX11-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v3
 ; GFX11-NEXT:    v_mov_b32_e32 v4, 1
 ; GFX11-NEXT:    v_dual_mov_b32 v5, 0 :: v_dual_and_b32 v4, 1, v2
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, 64, v3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll
index c87c334217b77..abac57d1ae6a1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll
@@ -503,7 +503,7 @@ define amdgpu_kernel void @v_mul64_masked_before_and_in_branch(ptr addrspace(1)
 ; GFX10-NEXT:    global_load_dwordx2 v[4:5], v0, s[6:7]
 ; GFX10-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; GFX10-NEXT:    s_waitcnt vmcnt(1)
-; GFX10-NEXT:    v_cmp_ge_u64_e32 vcc_lo, 0, v[2:3]
+; GFX10-NEXT:    v_cmp_gt_u64_e32 vcc_lo, 0, v[2:3]
 ; GFX10-NEXT:    s_and_saveexec_b32 s2, vcc_lo
 ; GFX10-NEXT:    s_xor_b32 s2, exec_lo, s2
 ; GFX10-NEXT:    s_cbranch_execz .LBB10_2
@@ -541,7 +541,7 @@ define amdgpu_kernel void @v_mul64_masked_before_and_in_branch(ptr addrspace(1)
 ; GFX11-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX11-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; GFX11-NEXT:    s_waitcnt vmcnt(1)
-; GFX11-NEXT:    v_cmpx_ge_u64_e32 0, v[2:3]
+; GFX11-NEXT:    v_cmpx_gt_u64_e32 0, v[2:3]
 ; GFX11-NEXT:    s_xor_b32 s2, exec_lo, s2
 ; GFX11-NEXT:    s_cbranch_execz .LBB10_2
 ; GFX11-NEXT:  ; %bb.1: ; %else
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
index 5240bf4f3a1d7..5cf026ee6976d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
@@ -324,8 +324,8 @@ define amdgpu_ps void @sext(i32 inreg %a, ptr addrspace(1) %ptr) {
 define amdgpu_ps void @and_i1_vcc(i32 %a, i32 %b, ptr addrspace(1) %ptr) {
 ; OLD_RBS-LABEL: and_i1_vcc:
 ; OLD_RBS:       ; %bb.0:
-; OLD_RBS-NEXT:    v_cmp_le_u32_e32 vcc_lo, 10, v0
-; OLD_RBS-NEXT:    v_cmp_le_u32_e64 s0, 20, v1
+; OLD_RBS-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 10, v0
+; OLD_RBS-NEXT:    v_cmp_lt_u32_e64 s0, 20, v1
 ; OLD_RBS-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
 ; OLD_RBS-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
 ; OLD_RBS-NEXT:    global_store_dword v[2:3], v0, off
@@ -333,8 +333,8 @@ define amdgpu_ps void @and_i1_vcc(i32 %a, i32 %b, ptr addrspace(1) %ptr) {
 ;
 ; NEW_RBS-LABEL: and_i1_vcc:
 ; NEW_RBS:       ; %bb.0:
-; NEW_RBS-NEXT:    v_cmp_le_u32_e32 vcc_lo, 10, v0
-; NEW_RBS-NEXT:    v_cmp_le_u32_e64 s0, 20, v1
+; NEW_RBS-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 10, v0
+; NEW_RBS-NEXT:    v_cmp_lt_u32_e64 s0, 20, v1
 ; NEW_RBS-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
 ; NEW_RBS-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
 ; NEW_RBS-NEXT:    global_store_dword v[2:3], v0, off
@@ -532,7 +532,7 @@ define amdgpu_cs void @loop_with_2breaks(ptr addrspace(1) %x, ptr addrspace(1) %
 ; OLD_RBS-NEXT:    v_add_co_u32 v7, vcc_lo, v0, v7
 ; OLD_RBS-NEXT:    v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo
 ; OLD_RBS-NEXT:    v_add_nc_u32_e32 v10, 1, v6
-; OLD_RBS-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 0x64, v6
+; OLD_RBS-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 0x64, v6
 ; OLD_RBS-NEXT:    s_andn2_b32 s4, -1, exec_lo
 ; OLD_RBS-NEXT:    global_load_dword v9, v[7:8], off
 ; OLD_RBS-NEXT:    v_mov_b32_e32 v6, v10
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index 832f066adaa84..71c50ea72432c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -1017,7 +1017,7 @@ define i24 @v_saddsat_i24(i24 %lhs, i24 %rhs) {
 ; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 24
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v0
 ; GFX8-NEXT:    v_bfe_i32 v0, v1, 0, 24
-; GFX8-NEXT:    v_cmp_gt_i32_e64 s[6:7], 0, v0
+; GFX8-NEXT:    v_cmp_ge_i32_e64 s[6:7], 0, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 23, v3
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0xff800000, v0
 ; GFX8-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
@@ -4090,7 +4090,7 @@ define i48 @v_saddsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX6-NEXT:    v_bfe_i32 v1, v0, 0, 16
 ; GFX6-NEXT:    v_bfe_i32 v3, v2, 0, 16
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
-; GFX6-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[2:3]
+; GFX6-NEXT:    v_cmp_ge_i64_e64 s[6:7], 0, v[2:3]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 0xffff8000, v0
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 15, v5
@@ -4108,7 +4108,7 @@ define i48 @v_saddsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX8-NEXT:    v_bfe_i32 v1, v0, 0, 16
 ; GFX8-NEXT:    v_bfe_i32 v3, v2, 0, 16
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
-; GFX8-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[2:3]
+; GFX8-NEXT:    v_cmp_ge_i64_e64 s[6:7], 0, v[2:3]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0xffff8000, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 15, v5
@@ -4125,7 +4125,7 @@ define i48 @v_saddsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
+; GFX9-NEXT:    v_cmp_ge_i64_e64 s[4:5], 0, v[2:3]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
@@ -4141,7 +4141,7 @@ define i48 @v_saddsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 16, v[2:3]
 ; GFX10-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_gt_i64_e32 vcc_lo, 0, v[2:3]
+; GFX10-NEXT:    v_cmp_ge_i64_e32 vcc_lo, 0, v[2:3]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s4, v[4:5], v[0:1]
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v6
@@ -4158,7 +4158,7 @@ define i48 @v_saddsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX11-NEXT:    v_lshlrev_b64 v[2:3], 16, v[2:3]
 ; GFX11-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
 ; GFX11-NEXT:    v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i64_e32 vcc_lo, 0, v[2:3]
+; GFX11-NEXT:    v_cmp_ge_i64_e32 vcc_lo, 0, v[2:3]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, v[4:5], v[0:1]
 ; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v6
@@ -4296,8 +4296,8 @@ define amdgpu_ps <2 x float> @saddsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX6-NEXT:    v_bfe_i32 v3, v2, 0, 16
 ; GFX6-NEXT:    s_bfe_i64 s[0:1], s[0:1], 0x300000
 ; GFX6-NEXT:    v_bfe_i32 v1, v0, 0, 16
-; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
-; GFX6-NEXT:    v_cmp_gt_i64_e64 s[2:3], 0, v[0:1]
+; GFX6-NEXT:    v_cmp_ge_i64_e64 s[0:1], s[0:1], v[2:3]
+; GFX6-NEXT:    v_cmp_ge_i64_e64 s[2:3], 0, v[0:1]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 15, v3
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0xffff8000, v0
@@ -4315,8 +4315,8 @@ define amdgpu_ps <2 x float> @saddsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX8-NEXT:    v_bfe_i32 v3, v2, 0, 16
 ; GFX8-NEXT:    s_bfe_i64 s[0:1], s[0:1], 0x300000
 ; GFX8-NEXT:    v_bfe_i32 v1, v0, 0, 16
-; GFX8-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_gt_i64_e64 s[2:3], 0, v[0:1]
+; GFX8-NEXT:    v_cmp_ge_i64_e64 s[0:1], s[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_ge_i64_e64 s[2:3], 0, v[0:1]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 15, v3
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xffff8000, v0
@@ -4333,8 +4333,8 @@ define amdgpu_ps <2 x float> @saddsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v1, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[0:1], 0, v[0:1]
+; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_ge_i64_e64 s[0:1], 0, v[0:1]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[0:1], vcc
@@ -4351,8 +4351,8 @@ define amdgpu_ps <2 x float> @saddsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, s0, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, s1, v1, vcc_lo
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
-; GFX10-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_gt_i64_e64 s0, 0, v[0:1]
+; GFX10-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[0:1], v[2:3]
+; GFX10-NEXT:    v_cmp_ge_i64_e64 s0, 0, v[0:1]
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc_lo
@@ -4368,8 +4368,8 @@ define amdgpu_ps <2 x float> @saddsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX11-NEXT:    v_add_co_u32 v2, vcc_lo, s0, v0
 ; GFX11-NEXT:    v_add_co_ci_u32_e64 v3, null, s1, v1, vcc_lo
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
-; GFX11-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_gt_i64_e64 s0, 0, v[0:1]
+; GFX11-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[0:1], v[2:3]
+; GFX11-NEXT:    v_cmp_ge_i64_e64 s0, 0, v[0:1]
 ; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v2, v4 :: v_dual_cndmask_b32 v1, v3, v1
@@ -4484,7 +4484,7 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v0, v2
 ; GFX6-NEXT:    v_addc_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
-; GFX6-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[2:3]
+; GFX6-NEXT:    v_cmp_ge_i64_e64 s[6:7], 0, v[2:3]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000000, v0
 ; GFX6-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
@@ -4498,7 +4498,7 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v0, v2
 ; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
-; GFX8-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[2:3]
+; GFX8-NEXT:    v_cmp_ge_i64_e64 s[6:7], 0, v[2:3]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX8-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
@@ -4512,7 +4512,7 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
+; GFX9-NEXT:    v_cmp_ge_i64_e64 s[4:5], 0, v[2:3]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
@@ -4525,7 +4525,7 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_gt_i64_e64 s4, 0, v[2:3]
+; GFX10-NEXT:    v_cmp_ge_i64_e64 s4, 0, v[2:3]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v6
@@ -4539,7 +4539,7 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
 ; GFX11-NEXT:    v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i64_e64 s0, 0, v[2:3]
+; GFX11-NEXT:    v_cmp_ge_i64_e64 s0, 0, v[2:3]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
 ; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v6
@@ -4656,8 +4656,8 @@ define amdgpu_ps <2 x float> @saddsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, s0, v0
 ; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v3, v1, vcc
-; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
-; GFX6-NEXT:    v_cmp_gt_i64_e64 s[2:3], 0, v[0:1]
+; GFX6-NEXT:    v_cmp_ge_i64_e64 s[0:1], s[0:1], v[2:3]
+; GFX6-NEXT:    v_cmp_ge_i64_e64 s[2:3], 0, v[0:1]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000000, v0
 ; GFX6-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
@@ -4670,8 +4670,8 @@ define amdgpu_ps <2 x float> @saddsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, s0, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v3, v1, vcc
-; GFX8-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_gt_i64_e64 s[2:3], 0, v[0:1]
+; GFX8-NEXT:    v_cmp_ge_i64_e64 s[0:1], s[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_ge_i64_e64 s[2:3], 0, v[0:1]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX8-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
@@ -4684,8 +4684,8 @@ define amdgpu_ps <2 x float> @saddsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v1, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[0:1], 0, v[0:1]
+; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_ge_i64_e64 s[0:1], 0, v[0:1]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[0:1], vcc
@@ -4698,8 +4698,8 @@ define amdgpu_ps <2 x float> @saddsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, s0, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, s1, v1, vcc_lo
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
-; GFX10-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_gt_i64_e64 s0, 0, v[0:1]
+; GFX10-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[0:1], v[2:3]
+; GFX10-NEXT:    v_cmp_ge_i64_e64 s0, 0, v[0:1]
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc_lo
@@ -4711,8 +4711,8 @@ define amdgpu_ps <2 x float> @saddsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX11-NEXT:    v_add_co_u32 v2, vcc_lo, s0, v0
 ; GFX11-NEXT:    v_add_co_ci_u32_e64 v3, null, s1, v1, vcc_lo
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
-; GFX11-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_gt_i64_e64 s0, 0, v[0:1]
+; GFX11-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[0:1], v[2:3]
+; GFX11-NEXT:    v_cmp_ge_i64_e64 s0, 0, v[0:1]
 ; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v2, v4 :: v_dual_cndmask_b32 v1, v3, v1
@@ -4801,7 +4801,7 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v0, v4
 ; GFX6-NEXT:    v_addc_u32_e32 v9, vcc, v1, v5, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1]
-; GFX6-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[4:5]
+; GFX6-NEXT:    v_cmp_ge_i64_e64 s[6:7], 0, v[4:5]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
 ; GFX6-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v0, v1
@@ -4811,7 +4811,7 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v2, v6
 ; GFX6-NEXT:    v_addc_u32_e32 v5, vcc, v3, v7, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[2:3]
-; GFX6-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[6:7]
+; GFX6-NEXT:    v_cmp_ge_i64_e64 s[6:7], 0, v[6:7]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0x80000000, v2
 ; GFX6-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
@@ -4825,7 +4825,7 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v0, v4
 ; GFX8-NEXT:    v_addc_u32_e32 v9, vcc, v1, v5, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1]
-; GFX8-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[4:5]
+; GFX8-NEXT:    v_cmp_ge_i64_e64 s[6:7], 0, v[4:5]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
 ; GFX8-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v0, v1
@@ -4835,7 +4835,7 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v2, v6
 ; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, v3, v7, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[2:3]
-; GFX8-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[6:7]
+; GFX8-NEXT:    v_cmp_ge_i64_e64 s[6:7], 0, v[6:7]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x80000000, v2
 ; GFX8-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
@@ -4849,7 +4849,7 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX9-NEXT:    v_add_co_u32_e32 v8, vcc, v0, v4
 ; GFX9-NEXT:    v_addc_co_u32_e32 v9, vcc, v1, v5, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1]
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[4:5]
+; GFX9-NEXT:    v_cmp_ge_i64_e64 s[4:5], 0, v[4:5]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
@@ -4858,7 +4858,7 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v2, v6
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v3, v7, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3]
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[6:7]
+; GFX9-NEXT:    v_cmp_ge_i64_e64 s[4:5], 0, v[6:7]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
 ; GFX9-NEXT:    v_add_u32_e32 v3, 0x80000000, v2
 ; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
@@ -4875,10 +4875,10 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v11, vcc_lo, v3, v7, vcc_lo
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v12, 31, v9
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[8:9], v[0:1]
-; GFX10-NEXT:    v_cmp_gt_i64_e64 s4, 0, v[4:5]
+; GFX10-NEXT:    v_cmp_ge_i64_e64 s4, 0, v[4:5]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v4, 31, v11
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s5, v[10:11], v[2:3]
-; GFX10-NEXT:    v_cmp_gt_i64_e64 s6, 0, v[6:7]
+; GFX10-NEXT:    v_cmp_ge_i64_e64 s6, 0, v[6:7]
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v12
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v4
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s4, vcc_lo
@@ -4898,10 +4898,10 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX11-NEXT:    v_add_co_ci_u32_e64 v11, null, v3, v7, vcc_lo
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v12, 31, v9
 ; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[8:9], v[0:1]
-; GFX11-NEXT:    v_cmp_gt_i64_e64 s0, 0, v[4:5]
+; GFX11-NEXT:    v_cmp_ge_i64_e64 s0, 0, v[4:5]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v4, 31, v11
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s1, v[10:11], v[2:3]
-; GFX11-NEXT:    v_cmp_gt_i64_e64 s2, 0, v[6:7]
+; GFX11-NEXT:    v_cmp_ge_i64_e64 s2, 0, v[6:7]
 ; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v12
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v4
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
@@ -5319,13 +5319,13 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX6-NEXT:    v_mov_b32_e32 v5, s3
 ; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, v4, v2, vcc
 ; GFX6-NEXT:    v_addc_u32_e32 v5, vcc, v5, v3, vcc
-; GFX6-NEXT:    v_cmp_gt_u64_e32 vcc, s[0:1], v[0:1]
+; GFX6-NEXT:    v_cmp_ge_u64_e32 vcc, s[0:1], v[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[4:5]
+; GFX6-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, s[2:3], v[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX6-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
+; GFX6-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[2:3]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v5
@@ -5350,13 +5350,13 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX8-NEXT:    v_mov_b32_e32 v5, s3
 ; GFX8-NEXT:    v_addc_u32_e32 v4, vcc, v4, v2, vcc
 ; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, v5, v3, vcc
-; GFX8-NEXT:    v_cmp_gt_u64_e32 vcc, s[0:1], v[0:1]
+; GFX8-NEXT:    v_cmp_ge_u64_e32 vcc, s[0:1], v[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[4:5]
+; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, s[2:3], v[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
+; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v5
@@ -5381,13 +5381,13 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_mov_b32_e32 v5, s3
 ; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v4, v2, vcc
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v5, v3, vcc
-; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, s[0:1], v[0:1]
+; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, s[0:1], v[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[4:5]
+; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, s[2:3], v[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
+; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v5
@@ -5408,11 +5408,11 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, s2, v2, vcc_lo
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, s3, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[0:1], v[0:1]
+; GFX10-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[0:1], v[0:1]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc_lo
-; GFX10-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[4:5]
+; GFX10-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[2:3], v[4:5]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc_lo
-; GFX10-NEXT:    v_cmp_gt_i64_e32 vcc_lo, 0, v[2:3]
+; GFX10-NEXT:    v_cmp_ge_i64_e32 vcc_lo, 0, v[2:3]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc_lo
 ; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[2:3], v[4:5]
 ; GFX10-NEXT:    v_cndmask_b32_e32 v6, v7, v6, vcc_lo
@@ -5435,11 +5435,11 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo
 ; GFX11-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, s2, v2, vcc_lo
 ; GFX11-NEXT:    v_add_co_ci_u32_e64 v5, null, s3, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[0:1], v[0:1]
+; GFX11-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[0:1], v[0:1]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[4:5]
+; GFX11-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[2:3], v[4:5]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i64_e32 vcc_lo, 0, v[2:3]
+; GFX11-NEXT:    v_cmp_ge_i64_e32 vcc_lo, 0, v[2:3]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[2:3], v[4:5]
 ; GFX11-NEXT:    v_cndmask_b32_e32 v6, v7, v6, vcc_lo
@@ -5635,7 +5635,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, v[16:17], v[2:3]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v17
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX6-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[10:11]
+; GFX6-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[10:11]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
@@ -5659,7 +5659,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v6, 31, v11
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX6-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[14:15]
+; GFX6-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[14:15]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, v5, 0, vcc
@@ -5687,7 +5687,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[16:17], v[2:3]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v17
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[10:11]
+; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[10:11]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
@@ -5711,7 +5711,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v6, 31, v11
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[14:15]
+; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[14:15]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, 0, vcc
@@ -5739,7 +5739,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[16:17], v[2:3]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v17
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[10:11]
+; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[10:11]
 ; GFX9-NEXT:    v_add_u32_e32 v3, 0x80000000, v2
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
@@ -5762,7 +5762,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v6, 31, v11
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[14:15]
+; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[14:15]
 ; GFX9-NEXT:    v_add_u32_e32 v7, 0x80000000, v6
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
@@ -5789,7 +5789,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
 ; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[16:17], v[2:3]
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX10-NEXT:    v_cmp_gt_i64_e32 vcc_lo, 0, v[10:11]
+; GFX10-NEXT:    v_cmp_ge_i64_e32 vcc_lo, 0, v[10:11]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v12, vcc_lo, v4, v12
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, v5, v13, vcc_lo
@@ -5803,7 +5803,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[18:19], v[6:7]
 ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
-; GFX10-NEXT:    v_cmp_gt_i64_e32 vcc_lo, 0, v[14:15]
+; GFX10-NEXT:    v_cmp_ge_i64_e32 vcc_lo, 0, v[14:15]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc_lo
 ; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[18:19], v[6:7]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v19
@@ -5840,7 +5840,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[16:17], v[2:3]
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i64_e32 vcc_lo, 0, v[10:11]
+; GFX11-NEXT:    v_cmp_ge_i64_e32 vcc_lo, 0, v[10:11]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_add_co_u32 v12, vcc_lo, v4, v12
 ; GFX11-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, v5, v13, vcc_lo
@@ -5853,7 +5853,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[18:19], v[6:7]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i64_e32 vcc_lo, 0, v[14:15]
+; GFX11-NEXT:    v_cmp_ge_i64_e32 vcc_lo, 0, v[14:15]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[18:19], v[6:7]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v19
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
index 2fa5492c8a2b7..86d7d034970be 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
@@ -98,12 +98,12 @@ define amdgpu_ps i32 @s_sdiv_i32(i32 inreg %num, i32 inreg %den) {
 ; GISEL-NEXT:    v_mul_lo_u32 v1, v0, s4
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
 ; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, s0, v1
-; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
+; GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v1
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-NEXT:    v_subrev_i32_e64 v2, s[0:1], s4, v1
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
+; GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v1
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-NEXT:    s_xor_b32 s0, s2, s3
 ; GISEL-NEXT:    v_xor_b32_e32 v0, s0, v0
@@ -132,12 +132,12 @@ define amdgpu_ps i32 @s_sdiv_i32(i32 inreg %num, i32 inreg %den) {
 ; CGP-NEXT:    v_mul_lo_u32 v1, v0, s2
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, s0, v1
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s2, v1
+; CGP-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v1
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CGP-NEXT:    v_subrev_i32_e64 v2, s[0:1], s2, v1
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s2, v1
+; CGP-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v1
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v0, s4, v0
 ; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s4, v0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 4031fe0be2823..dc0515417b85d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -306,19 +306,19 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mov_b32_e32 v3, s11
 ; CHECK-NEXT:    v_subb_u32_e64 v2, s[0:1], v5, v1, vcc
 ; CHECK-NEXT:    v_sub_i32_e64 v1, s[0:1], s13, v1
-; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v2
+; CHECK-NEXT:    v_cmp_lt_u32_e64 s[0:1], s11, v2
 ; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
-; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v0
+; CHECK-NEXT:    v_cmp_lt_u32_e64 s[0:1], s10, v0
 ; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s10, v0
 ; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v4
 ; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[0:1]
 ; CHECK-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v2
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s11, v1
+; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s11, v1
 ; CHECK-NEXT:    v_cndmask_b32_e64 v2, v5, v6, s[0:1]
 ; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s10, v0
+; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s10, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s11, v1
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
@@ -350,12 +350,12 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mul_lo_u32 v1, v0, s4
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
 ; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, s2, v1
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
+; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v1
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CHECK-NEXT:    v_subrev_i32_e64 v2, s[0:1], s4, v1
 ; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
+; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v1
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CHECK-NEXT:  .LBB1_5:
 ; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
index 02f8d0bf3c3df..27a6d2a48c751 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
@@ -34,12 +34,12 @@ define amdgpu_kernel void @sdivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v2, s5
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s4, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s5, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s5, v3
 ; GFX8-NEXT:    v_xor_b32_e32 v2, s6, v2
@@ -78,12 +78,12 @@ define amdgpu_kernel void @sdivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    v_mul_lo_u32 v1, v0, s5
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v1, s7, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -121,11 +121,11 @@ define amdgpu_kernel void @sdivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s0, v1
 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s5, v1
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s5, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s5, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v2, 1, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s5, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s5, v1
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s5, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
@@ -258,9 +258,9 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_mov_b32_e32 v5, s9
 ; GFX8-NEXT:    v_subb_u32_e64 v2, s[0:1], v6, v1, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v1, s[0:1], s11, v1
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s9, v2
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v0
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s8, v0
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v2
 ; GFX8-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
@@ -269,9 +269,9 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_subbrev_u32_e64 v8, s[0:1], 0, v1, vcc
 ; GFX8-NEXT:    v_add_u32_e64 v9, s[0:1], 1, v4
 ; GFX8-NEXT:    v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v8
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s9, v8
 ; GFX8-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v7
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s8, v7
 ; GFX8-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v8
@@ -415,10 +415,10 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s7, v5, v[1:2]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX9-NEXT:    v_subb_co_u32_e64 v2, s[0:1], v6, v1, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s7, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s7, v2
 ; GFX9-NEXT:    v_sub_u32_e32 v1, s9, v1
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s6, v0
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s6, v0
 ; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s7, v2
 ; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v7, vcc
@@ -427,9 +427,9 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    v_subbrev_co_u32_e64 v9, s[0:1], 0, v1, vcc
 ; GFX9-NEXT:    v_add_co_u32_e64 v10, s[0:1], 1, v5
 ; GFX9-NEXT:    v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s7, v9
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s7, v9
 ; GFX9-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s6, v8
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s6, v8
 ; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v7, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s7, v9
@@ -571,16 +571,16 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v6, s1, v1
 ; GFX10-NEXT:    v_sub_co_ci_u32_e64 v1, s0, s1, v1, vcc_lo
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v6, vcc_lo, s7, v6, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s6, v0
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s6, v0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc_lo
 ; GFX10-NEXT:    v_sub_co_u32 v8, vcc_lo, v0, s6
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v9, s0, 0, v6, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s7, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s7, v1
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v6, vcc_lo, s7, v6, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s6, v8
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s6, v8
 ; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s7, v9
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s7, v9
 ; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s0
 ; GFX10-NEXT:    v_add_co_u32 v13, s0, v2, 1
 ; GFX10-NEXT:    v_add_co_ci_u32_e64 v14, s0, 0, v4, s0
@@ -655,12 +655,12 @@ define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
 ; GFX8-NEXT:    v_mul_hi_u32 v2, v1, v2
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s0, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s3, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s3, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s3, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s3, v3
 ; GFX8-NEXT:    s_xor_b32 s0, s12, s2
@@ -677,12 +677,12 @@ define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, s12, v2
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s1, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s11, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s11, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s11, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s11, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s11, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s11, v3
 ; GFX8-NEXT:    s_xor_b32 s0, s2, s10
@@ -736,12 +736,12 @@ define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_add_u32_e32 v2, 1, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v1, s5, v1
 ; GFX9-NEXT:    v_sub_u32_e32 v3, s4, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v2, s6, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s6, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
@@ -750,12 +750,12 @@ define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    s_xor_b32 s4, s10, s8
 ; GFX9-NEXT:    v_xor_b32_e32 v0, s4, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v3, s5, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s7, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s7, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s7, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s7, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s7, v3
 ; GFX9-NEXT:    v_subrev_u32_e32 v0, s4, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s7, v3
@@ -813,18 +813,18 @@ define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX10-NEXT:    v_add_nc_u32_e32 v5, 1, v1
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, s0, v2
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v3, s7, v3
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s4, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s4, v2
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, s4, v2
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s3, v3
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s3, v3
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s3, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v4, 1, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s4, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s4, v2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v5, 1, v1
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s3, v3
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s3, v3
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, s4, v2
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s3, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
@@ -886,12 +886,12 @@ define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_mul_lo_u32 v2, v0, s3
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s0, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s3, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s3, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s3, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s3, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
@@ -912,7 +912,7 @@ define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    s_add_i32 s0, s18, s3
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s1, v3
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s10, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s10, v2
 ; GFX8-NEXT:    s_xor_b32 s8, s0, s3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v3, s8
@@ -920,7 +920,7 @@ define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v3, v3
 ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s10, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s10, v2
 ; GFX8-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
@@ -944,7 +944,7 @@ define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    s_add_i32 s0, s19, s2
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s1, v6
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, 1, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s8, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v2
 ; GFX8-NEXT:    s_xor_b32 s10, s0, s2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v6, s10
@@ -952,7 +952,7 @@ define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v6, v6
 ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, 1, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s8, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v2
 ; GFX8-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v6, v6
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
@@ -974,12 +974,12 @@ define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, s9, v3
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s1, v7
 ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, 1, v8
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s10, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s10, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v8, v7, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v8, s[0:1], s10, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 1, v7
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s10, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s10, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v8, s[0:1], s10, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v8, v3, v8, vcc
@@ -1034,12 +1034,12 @@ define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_add_u32_e32 v2, 1, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v1, s1, v1
 ; GFX9-NEXT:    v_sub_u32_e32 v3, s0, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v2, s4, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s4, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
@@ -1055,7 +1055,7 @@ define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    s_xor_b32 s1, s1, s0
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v3, s1
 ; GFX9-NEXT:    v_add_u32_e32 v5, 1, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v3
 ; GFX9-NEXT:    v_subrev_u32_e32 v5, s5, v2
@@ -1063,7 +1063,7 @@ define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_add_u32_e32 v5, 1, v1
 ; GFX9-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v2
 ; GFX9-NEXT:    s_sub_i32 s4, 0, s1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX9-NEXT:    v_mul_lo_u32 v5, s4, v3
@@ -1092,13 +1092,13 @@ define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    s_sub_i32 s2, 0, s6
 ; GFX9-NEXT:    v_mul_lo_u32 v8, s2, v7
 ; GFX9-NEXT:    v_add_u32_e32 v6, 1, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s1, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s1, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v6, s1, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
 ; GFX9-NEXT:    v_mul_hi_u32 v8, v7, v8
 ; GFX9-NEXT:    v_add_u32_e32 v6, 1, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s1, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s1, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v6, s1, v2
 ; GFX9-NEXT:    s_ashr_i32 s1, s3, 31
@@ -1114,12 +1114,12 @@ define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_subrev_u32_e32 v2, s0, v2
 ; GFX9-NEXT:    s_xor_b32 s0, s1, s4
 ; GFX9-NEXT:    v_sub_u32_e32 v3, s2, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v8, s6, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v8, 1, v7
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v8, s6, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v8, v3, v8, vcc
@@ -1215,10 +1215,10 @@ define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v5, s1, v5
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v6, s2, v6
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v7, s3, v7
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s14, v4
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s15, v5
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s16, v6
-; GFX10-NEXT:    v_cmp_le_u32_e64 s2, s17, v7
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s14, v4
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s15, v5
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s1, s16, v6
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s2, s17, v7
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v8, s14, v4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v9, s0
@@ -1235,10 +1235,10 @@ define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX10-NEXT:    v_add_nc_u32_e32 v9, 1, v1
 ; GFX10-NEXT:    v_add_nc_u32_e32 v10, 1, v2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v11, 1, v3
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s14, v4
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s15, v5
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s16, v6
-; GFX10-NEXT:    v_cmp_le_u32_e64 s2, s17, v7
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s14, v4
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s15, v5
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s1, s16, v6
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s2, s17, v7
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v8, s14, v4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v9, s0
@@ -1398,9 +1398,9 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    s_ashr_i32 s10, s3, 31
 ; GFX8-NEXT:    v_subb_u32_e64 v6, s[0:1], v6, v1, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v0, s[0:1], s11, v1
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v6
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s9, v6
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v7
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s8, v7
 ; GFX8-NEXT:    v_subb_u32_e32 v0, vcc, v0, v5, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v6
@@ -1409,9 +1409,9 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_subbrev_u32_e64 v9, s[0:1], 0, v0, vcc
 ; GFX8-NEXT:    v_add_u32_e64 v1, s[0:1], 1, v4
 ; GFX8-NEXT:    v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v9
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s9, v9
 ; GFX8-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v8
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s8, v8
 ; GFX8-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v9
 ; GFX8-NEXT:    v_cndmask_b32_e64 v11, v11, v12, s[0:1]
@@ -1543,18 +1543,18 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s3
 ; GFX8-NEXT:    v_subb_u32_e64 v7, s[0:1], v10, v6, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v6, s[0:1], s9, v6
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v7
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s3, v7
 ; GFX8-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s2, v2
 ; GFX8-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s3, v7
 ; GFX8-NEXT:    v_subb_u32_e32 v6, vcc, v6, v3, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v10, v10, v11, s[0:1]
 ; GFX8-NEXT:    v_subrev_u32_e32 v11, vcc, s2, v2
 ; GFX8-NEXT:    v_subbrev_u32_e64 v12, s[0:1], 0, v6, vcc
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v12
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s3, v12
 ; GFX8-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v11
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s2, v11
 ; GFX8-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s3, v12
 ; GFX8-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[0:1]
@@ -1706,9 +1706,9 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    s_ashr_i32 s10, s3, 31
 ; GFX9-NEXT:    v_subb_co_u32_e64 v6, s[0:1], v6, v1, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v0, s11, v1
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v6
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s9, v6
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v7
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s8, v7
 ; GFX9-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v4, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v6
@@ -1717,9 +1717,9 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_subbrev_co_u32_e64 v10, s[0:1], 0, v0, vcc
 ; GFX9-NEXT:    v_add_co_u32_e64 v2, s[0:1], 1, v5
 ; GFX9-NEXT:    v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v10
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s9, v10
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v9
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s8, v9
 ; GFX9-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v10
 ; GFX9-NEXT:    v_cndmask_b32_e64 v12, v1, v12, s[0:1]
@@ -1847,19 +1847,19 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_mad_u64_u32 v[6:7], s[0:1], s3, v10, v[6:7]
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s3
 ; GFX9-NEXT:    v_subb_co_u32_e64 v7, s[0:1], v9, v6, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v7
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s3, v7
 ; GFX9-NEXT:    v_sub_u32_e32 v6, s9, v6
 ; GFX9-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s2, v2
 ; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s3, v7
 ; GFX9-NEXT:    v_subb_co_u32_e32 v6, vcc, v6, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[0:1]
 ; GFX9-NEXT:    v_subrev_co_u32_e32 v11, vcc, s2, v2
 ; GFX9-NEXT:    v_subbrev_co_u32_e64 v12, s[0:1], 0, v6, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v12
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s3, v12
 ; GFX9-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v11
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s2, v11
 ; GFX9-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s3, v12
 ; GFX9-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[0:1]
@@ -2109,14 +2109,14 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v9, vcc_lo, s7, v9, vcc_lo
 ; GFX10-NEXT:    v_sub_co_u32 v3, vcc_lo, v14, s6
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v16, s0, 0, v9, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s6, v14
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s6, v14
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v9, vcc_lo, s7, v9, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v17, 0, -1, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s6, v3
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s6, v3
 ; GFX10-NEXT:    v_cndmask_b32_e64 v18, 0, -1, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s7, v16
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s7, v16
 ; GFX10-NEXT:    v_cndmask_b32_e64 v19, 0, -1, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s7, v15
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s7, v15
 ; GFX10-NEXT:    v_cndmask_b32_e64 v20, 0, -1, s0
 ; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s2, v5, v[0:1]
 ; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s7, v16
@@ -2139,10 +2139,10 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s19, v0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v14, v3, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v7, v15, v7, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s3, v8
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s3, v8
 ; GFX10-NEXT:    v_xor_b32_e32 v1, s16, v1
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v11, vcc_lo, s3, v0, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s2, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s2, v2
 ; GFX10-NEXT:    v_xor_b32_e32 v4, s17, v4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s0
 ; GFX10-NEXT:    v_xor_b32_e32 v3, s4, v3
@@ -2155,9 +2155,9 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s3, v8
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v11, vcc_lo, s3, v11, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v4, v9, v12, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s3, v14
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s3, v14
 ; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s2, v13
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s2, v13
 ; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s0
 ; GFX10-NEXT:    v_add_co_u32 v15, s0, v6, 1
 ; GFX10-NEXT:    v_add_co_ci_u32_e64 v16, s0, 0, v5, s0
@@ -2231,12 +2231,12 @@ define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v2, s6
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s4, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s6, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s6, v3
 ; GFX8-NEXT:    v_xor_b32_e32 v2, s5, v2
@@ -2277,12 +2277,12 @@ define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out
 ; GFX9-NEXT:    v_mul_lo_u32 v1, v0, s5
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v1, s7, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -2322,11 +2322,11 @@ define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s0, v1
 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s5, v1
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s5, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s5, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v2, 1, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s5, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s5, v1
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s5, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
@@ -2383,12 +2383,12 @@ define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_mul_lo_u32 v2, v0, s10
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s0, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s10, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s10, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s10, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s10, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s10, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s10, v2
 ; GFX8-NEXT:    s_sub_i32 s1, 0, s13
@@ -2409,12 +2409,12 @@ define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, s11, v2
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s1, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s13, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s13, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s13, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s13, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s13, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s13, v3
 ; GFX8-NEXT:    s_xor_b32 s0, s2, s12
@@ -2477,7 +2477,7 @@ define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    s_add_i32 s4, s4, s11
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v3, s7, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v3
 ; GFX9-NEXT:    s_xor_b32 s4, s4, s11
 ; GFX9-NEXT:    v_add_u32_e32 v1, v1, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
@@ -2485,7 +2485,7 @@ define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    v_mul_hi_u32 v1, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s6, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
@@ -2494,12 +2494,12 @@ define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    s_xor_b32 s5, s10, s5
 ; GFX9-NEXT:    v_xor_b32_e32 v0, s5, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v3, s4, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s8, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s8, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s8, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX9-NEXT:    s_xor_b32 s4, s11, s9
 ; GFX9-NEXT:    v_xor_b32_e32 v1, s4, v1
@@ -2568,19 +2568,19 @@ define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, s4, v2
 ; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[8:9], 0x0
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v3, s0, v3
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s1, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s1, v2
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v5, s1, v2
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s3, v3
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s3, v3
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s3, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v6, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v4, 1, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s1, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s1, v2
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v5, s1, v2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v6, 1, v1
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s3, v3
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s3, v3
 ; GFX10-NEXT:    s_xor_b32 s1, s11, s2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v4, s3, v3
@@ -2646,12 +2646,12 @@ define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %ou
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v2, s6
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s4, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s6, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s6, v3
 ; GFX8-NEXT:    v_xor_b32_e32 v2, s5, v2
@@ -2692,12 +2692,12 @@ define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %ou
 ; GFX9-NEXT:    v_mul_lo_u32 v1, v0, s5
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v1, s7, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -2737,11 +2737,11 @@ define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %ou
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s0, v1
 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s5, v1
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s5, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s5, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v2, 1, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s5, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s5, v1
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s5, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
@@ -2798,12 +2798,12 @@ define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_mul_lo_u32 v2, v0, s11
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s0, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s11, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s11, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s11, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s11, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s11, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s11, v2
 ; GFX8-NEXT:    s_sub_i32 s1, 0, s13
@@ -2824,12 +2824,12 @@ define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, s3, v2
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s1, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s13, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s13, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s13, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s13, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s13, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s13, v3
 ; GFX8-NEXT:    s_xor_b32 s0, s2, s12
@@ -2892,7 +2892,7 @@ define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    s_add_i32 s4, s4, s11
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v3, s8, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s7, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s7, v3
 ; GFX9-NEXT:    s_xor_b32 s4, s4, s11
 ; GFX9-NEXT:    v_add_u32_e32 v1, v1, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
@@ -2900,7 +2900,7 @@ define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_mul_hi_u32 v1, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s7, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s7, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s7, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
@@ -2909,12 +2909,12 @@ define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    s_xor_b32 s6, s10, s6
 ; GFX9-NEXT:    v_xor_b32_e32 v0, s6, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v3, s4, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s5, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s5, v3
 ; GFX9-NEXT:    s_xor_b32 s4, s11, s9
@@ -2981,19 +2981,19 @@ define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, s4, v2
 ; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[8:9], 0x0
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v3, s0, v3
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s2, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s2, v2
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v5, s2, v2
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s1, v3
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s1, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v4, s1, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v6, s0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v5, 1, v0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s2, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s2, v2
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v4, s2, v2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v6, 1, v1
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s1, v3
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s1, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v5, s1, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc_lo
@@ -3058,12 +3058,12 @@ define amdgpu_kernel void @sdivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v2, s6
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s4, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s6, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s6, v3
 ; GFX8-NEXT:    v_xor_b32_e32 v2, s5, v2
@@ -3106,12 +3106,12 @@ define amdgpu_kernel void @sdivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %
 ; GFX9-NEXT:    v_mul_lo_u32 v1, v0, s5
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v1, s7, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -3152,11 +3152,11 @@ define amdgpu_kernel void @sdivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %
 ; GFX10-NEXT:    v_add_nc_u32_e32 v2, 1, v0
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s0, v1
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s1, v1
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s1, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s1, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v2, 1, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s1, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s1, v1
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s1, v1
 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
@@ -3212,12 +3212,12 @@ define amdgpu_kernel void @sdivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v2, s6
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s4, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s6, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s6, v3
 ; GFX8-NEXT:    v_xor_b32_e32 v2, s5, v2
@@ -3260,12 +3260,12 @@ define amdgpu_kernel void @sdivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    v_mul_lo_u32 v1, v0, s5
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v1, s7, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -3306,11 +3306,11 @@ define amdgpu_kernel void @sdivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX10-NEXT:    v_add_nc_u32_e32 v2, 1, v0
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s0, v1
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s1, v1
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s1, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s1, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v2, 1, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s1, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s1, v1
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s1, v1
 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
index 2f03c7156babc..8af934b194350 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
@@ -1574,7 +1574,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
 ; GFX6-NEXT:    v_lshl_b64 v[6:7], v[0:1], v3
 ; GFX6-NEXT:    v_or_b32_e32 v9, v4, v5
 ; GFX6-NEXT:    v_lshl_b64 v[4:5], v[0:1], v8
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
+; GFX6-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v3
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v7, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc
@@ -1592,7 +1592,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
 ; GFX8-NEXT:    v_lshlrev_b64 v[6:7], v3, v[0:1]
 ; GFX8-NEXT:    v_or_b32_e32 v9, v4, v5
 ; GFX8-NEXT:    v_lshlrev_b64 v[4:5], v8, v[0:1]
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
+; GFX8-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v7, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc
@@ -1610,7 +1610,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
 ; GFX9-NEXT:    v_lshlrev_b64 v[6:7], v3, v[0:1]
 ; GFX9-NEXT:    v_or_b32_e32 v9, v4, v5
 ; GFX9-NEXT:    v_lshlrev_b64 v[4:5], v8, v[0:1]
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v7, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc
@@ -1624,7 +1624,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v6, 64, v3
 ; GFX10-NEXT:    v_lshlrev_b64 v[4:5], v3, v[2:3]
 ; GFX10-NEXT:    v_add_nc_u32_e32 v8, 0xffffffc0, v3
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v3
+; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v3
 ; GFX10-NEXT:    v_lshrrev_b64 v[5:6], v6, v[0:1]
 ; GFX10-NEXT:    v_lshlrev_b64 v[6:7], v3, v[0:1]
 ; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v8, v[0:1]
@@ -1642,7 +1642,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v6, 64, v3
 ; GFX11-NEXT:    v_lshlrev_b64 v[4:5], v3, v[2:3]
 ; GFX11-NEXT:    v_add_nc_u32_e32 v8, 0xffffffc0, v3
-; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v3
+; GFX11-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 64, v3
 ; GFX11-NEXT:    v_lshrrev_b64 v[5:6], v6, v[0:1]
 ; GFX11-NEXT:    v_lshlrev_b64 v[6:7], v3, v[0:1]
 ; GFX11-NEXT:    v_lshlrev_b64 v[8:9], v8, v[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
index 530f4cf53321e..2648d757df854 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
@@ -92,10 +92,10 @@ define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) {
 ; GISEL-NEXT:    v_mul_lo_u32 v0, v0, s1
 ; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
 ; GISEL-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
-; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
+; GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s1, v0
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GISEL-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
-; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
+; GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s1, v0
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GISEL-NEXT:    v_xor_b32_e32 v0, s2, v0
 ; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, s2, v0
@@ -122,10 +122,10 @@ define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) {
 ; CGP-NEXT:    v_mul_lo_u32 v0, v0, s1
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
 ; CGP-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
+; CGP-NEXT:    v_cmp_lt_u32_e32 vcc, s1, v0
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; CGP-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
+; CGP-NEXT:    v_cmp_lt_u32_e32 vcc, s1, v0
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v0, s2, v0
 ; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s2, v0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index 1a10f5fb7a5ce..6d669f4d3a37e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -300,18 +300,18 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mov_b32_e32 v4, s9
 ; CHECK-NEXT:    v_subb_u32_e64 v2, s[0:1], v3, v1, vcc
 ; CHECK-NEXT:    v_sub_i32_e64 v1, s[0:1], s11, v1
-; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v2
+; CHECK-NEXT:    v_cmp_lt_u32_e64 s[0:1], s9, v2
 ; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[0:1]
-; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v0
+; CHECK-NEXT:    v_cmp_lt_u32_e64 s[0:1], s8, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
 ; CHECK-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v2
 ; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e64 v2, v3, v5, s[0:1]
 ; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, s8, v0
 ; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s9, v1
+; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s9, v1
 ; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s8, v3
+; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v3
 ; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s9, v1
 ; CHECK-NEXT:    v_cndmask_b32_e32 v1, v4, v5, vcc
@@ -342,10 +342,10 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mul_lo_u32 v0, v0, s4
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
 ; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, s4, v0
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
+; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v0
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, s4, v0
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
+; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v0
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; CHECK-NEXT:  .LBB1_5:
 ; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index 2673ac4fb5bae..4012fdf5e1c11 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -1018,7 +1018,7 @@ define i24 @v_ssubsat_i24(i24 %lhs, i24 %rhs) {
 ; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 24
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v0
 ; GFX8-NEXT:    v_bfe_i32 v0, v1, 0, 24
-; GFX8-NEXT:    v_cmp_lt_i32_e64 s[6:7], 0, v0
+; GFX8-NEXT:    v_cmp_le_i32_e64 s[6:7], 0, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 23, v3
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0xff800000, v0
 ; GFX8-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
@@ -4096,7 +4096,7 @@ define i48 @v_ssubsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX6-NEXT:    v_bfe_i32 v1, v0, 0, 16
 ; GFX6-NEXT:    v_bfe_i32 v3, v2, 0, 16
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
-; GFX6-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[2:3]
+; GFX6-NEXT:    v_cmp_le_i64_e64 s[6:7], 0, v[2:3]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 0xffff8000, v0
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 15, v5
@@ -4114,7 +4114,7 @@ define i48 @v_ssubsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX8-NEXT:    v_bfe_i32 v1, v0, 0, 16
 ; GFX8-NEXT:    v_bfe_i32 v3, v2, 0, 16
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
-; GFX8-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[2:3]
+; GFX8-NEXT:    v_cmp_le_i64_e64 s[6:7], 0, v[2:3]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0xffff8000, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 15, v5
@@ -4131,7 +4131,7 @@ define i48 @v_ssubsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX9-NEXT:    v_sub_co_u32_e32 v4, vcc, v0, v2
 ; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
+; GFX9-NEXT:    v_cmp_le_i64_e64 s[4:5], 0, v[2:3]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
@@ -4147,7 +4147,7 @@ define i48 @v_ssubsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 16, v[2:3]
 ; GFX10-NEXT:    v_sub_co_u32 v4, vcc_lo, v0, v2
 ; GFX10-NEXT:    v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, 0, v[2:3]
+; GFX10-NEXT:    v_cmp_le_i64_e32 vcc_lo, 0, v[2:3]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s4, v[4:5], v[0:1]
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v6
@@ -4164,7 +4164,7 @@ define i48 @v_ssubsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX11-NEXT:    v_lshlrev_b64 v[2:3], 16, v[2:3]
 ; GFX11-NEXT:    v_sub_co_u32 v4, vcc_lo, v0, v2
 ; GFX11-NEXT:    v_sub_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, 0, v[2:3]
+; GFX11-NEXT:    v_cmp_le_i64_e32 vcc_lo, 0, v[2:3]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, v[4:5], v[0:1]
 ; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v6
@@ -4302,8 +4302,8 @@ define amdgpu_ps <2 x float> @ssubsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX6-NEXT:    v_bfe_i32 v3, v2, 0, 16
 ; GFX6-NEXT:    s_bfe_i64 s[0:1], s[0:1], 0x300000
 ; GFX6-NEXT:    v_bfe_i32 v1, v0, 0, 16
-; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
-; GFX6-NEXT:    v_cmp_lt_i64_e64 s[2:3], 0, v[0:1]
+; GFX6-NEXT:    v_cmp_ge_i64_e64 s[0:1], s[0:1], v[2:3]
+; GFX6-NEXT:    v_cmp_le_i64_e64 s[2:3], 0, v[0:1]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 15, v3
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0xffff8000, v0
@@ -4321,8 +4321,8 @@ define amdgpu_ps <2 x float> @ssubsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX8-NEXT:    v_bfe_i32 v3, v2, 0, 16
 ; GFX8-NEXT:    s_bfe_i64 s[0:1], s[0:1], 0x300000
 ; GFX8-NEXT:    v_bfe_i32 v1, v0, 0, 16
-; GFX8-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_lt_i64_e64 s[2:3], 0, v[0:1]
+; GFX8-NEXT:    v_cmp_ge_i64_e64 s[0:1], s[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_le_i64_e64 s[2:3], 0, v[0:1]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 15, v3
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xffff8000, v0
@@ -4339,8 +4339,8 @@ define amdgpu_ps <2 x float> @ssubsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-NEXT:    v_sub_co_u32_e32 v2, vcc, s0, v0
 ; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v1, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[0:1], 0, v[0:1]
+; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_le_i64_e64 s[0:1], 0, v[0:1]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[0:1], vcc
@@ -4357,8 +4357,8 @@ define amdgpu_ps <2 x float> @ssubsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX10-NEXT:    v_sub_co_u32 v2, vcc_lo, s0, v0
 ; GFX10-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, s1, v1, vcc_lo
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
-; GFX10-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_lt_i64_e64 s0, 0, v[0:1]
+; GFX10-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[0:1], v[2:3]
+; GFX10-NEXT:    v_cmp_le_i64_e64 s0, 0, v[0:1]
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc_lo
@@ -4374,8 +4374,8 @@ define amdgpu_ps <2 x float> @ssubsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX11-NEXT:    v_sub_co_u32 v2, vcc_lo, s0, v0
 ; GFX11-NEXT:    v_sub_co_ci_u32_e64 v3, null, s1, v1, vcc_lo
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
-; GFX11-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, 0, v[0:1]
+; GFX11-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[0:1], v[2:3]
+; GFX11-NEXT:    v_cmp_le_i64_e64 s0, 0, v[0:1]
 ; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v2, v4 :: v_dual_cndmask_b32 v1, v3, v1
@@ -4490,7 +4490,7 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
 ; GFX6-NEXT:    v_subb_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
-; GFX6-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[2:3]
+; GFX6-NEXT:    v_cmp_le_i64_e64 s[6:7], 0, v[2:3]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000000, v0
 ; GFX6-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
@@ -4504,7 +4504,7 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v0, v2
 ; GFX8-NEXT:    v_subb_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
-; GFX8-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[2:3]
+; GFX8-NEXT:    v_cmp_le_i64_e64 s[6:7], 0, v[2:3]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX8-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
@@ -4518,7 +4518,7 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX9-NEXT:    v_sub_co_u32_e32 v4, vcc, v0, v2
 ; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
+; GFX9-NEXT:    v_cmp_le_i64_e64 s[4:5], 0, v[2:3]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
@@ -4531,7 +4531,7 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_sub_co_u32 v4, vcc_lo, v0, v2
 ; GFX10-NEXT:    v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_lt_i64_e64 s4, 0, v[2:3]
+; GFX10-NEXT:    v_cmp_le_i64_e64 s4, 0, v[2:3]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v6
@@ -4545,7 +4545,7 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_sub_co_u32 v4, vcc_lo, v0, v2
 ; GFX11-NEXT:    v_sub_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, 0, v[2:3]
+; GFX11-NEXT:    v_cmp_le_i64_e64 s0, 0, v[2:3]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
 ; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v6
@@ -4662,8 +4662,8 @@ define amdgpu_ps <2 x float> @ssubsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s0, v0
 ; GFX6-NEXT:    v_subb_u32_e32 v3, vcc, v3, v1, vcc
-; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
-; GFX6-NEXT:    v_cmp_lt_i64_e64 s[2:3], 0, v[0:1]
+; GFX6-NEXT:    v_cmp_ge_i64_e64 s[0:1], s[0:1], v[2:3]
+; GFX6-NEXT:    v_cmp_le_i64_e64 s[2:3], 0, v[0:1]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000000, v0
 ; GFX6-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
@@ -4676,8 +4676,8 @@ define amdgpu_ps <2 x float> @ssubsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s0, v0
 ; GFX8-NEXT:    v_subb_u32_e32 v3, vcc, v3, v1, vcc
-; GFX8-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_lt_i64_e64 s[2:3], 0, v[0:1]
+; GFX8-NEXT:    v_cmp_ge_i64_e64 s[0:1], s[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_le_i64_e64 s[2:3], 0, v[0:1]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX8-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
@@ -4690,8 +4690,8 @@ define amdgpu_ps <2 x float> @ssubsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-NEXT:    v_sub_co_u32_e32 v2, vcc, s0, v0
 ; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v1, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[0:1], 0, v[0:1]
+; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_le_i64_e64 s[0:1], 0, v[0:1]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[0:1], vcc
@@ -4704,8 +4704,8 @@ define amdgpu_ps <2 x float> @ssubsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX10-NEXT:    v_sub_co_u32 v2, vcc_lo, s0, v0
 ; GFX10-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, s1, v1, vcc_lo
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
-; GFX10-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_lt_i64_e64 s0, 0, v[0:1]
+; GFX10-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[0:1], v[2:3]
+; GFX10-NEXT:    v_cmp_le_i64_e64 s0, 0, v[0:1]
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc_lo
@@ -4717,8 +4717,8 @@ define amdgpu_ps <2 x float> @ssubsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX11-NEXT:    v_sub_co_u32 v2, vcc_lo, s0, v0
 ; GFX11-NEXT:    v_sub_co_ci_u32_e64 v3, null, s1, v1, vcc_lo
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
-; GFX11-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, 0, v[0:1]
+; GFX11-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[0:1], v[2:3]
+; GFX11-NEXT:    v_cmp_le_i64_e64 s0, 0, v[0:1]
 ; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v4
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v2, v4 :: v_dual_cndmask_b32 v1, v3, v1
@@ -4807,7 +4807,7 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, v0, v4
 ; GFX6-NEXT:    v_subb_u32_e32 v9, vcc, v1, v5, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1]
-; GFX6-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[4:5]
+; GFX6-NEXT:    v_cmp_le_i64_e64 s[6:7], 0, v[4:5]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
 ; GFX6-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v0, v1
@@ -4817,7 +4817,7 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v2, v6
 ; GFX6-NEXT:    v_subb_u32_e32 v5, vcc, v3, v7, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[2:3]
-; GFX6-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[6:7]
+; GFX6-NEXT:    v_cmp_le_i64_e64 s[6:7], 0, v[6:7]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0x80000000, v2
 ; GFX6-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
@@ -4831,7 +4831,7 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, v0, v4
 ; GFX8-NEXT:    v_subb_u32_e32 v9, vcc, v1, v5, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1]
-; GFX8-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[4:5]
+; GFX8-NEXT:    v_cmp_le_i64_e64 s[6:7], 0, v[4:5]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
 ; GFX8-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v0, v1
@@ -4841,7 +4841,7 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v2, v6
 ; GFX8-NEXT:    v_subb_u32_e32 v5, vcc, v3, v7, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[2:3]
-; GFX8-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[6:7]
+; GFX8-NEXT:    v_cmp_le_i64_e64 s[6:7], 0, v[6:7]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x80000000, v2
 ; GFX8-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
@@ -4855,7 +4855,7 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX9-NEXT:    v_sub_co_u32_e32 v8, vcc, v0, v4
 ; GFX9-NEXT:    v_subb_co_u32_e32 v9, vcc, v1, v5, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1]
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[4:5]
+; GFX9-NEXT:    v_cmp_le_i64_e64 s[4:5], 0, v[4:5]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
@@ -4864,7 +4864,7 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX9-NEXT:    v_sub_co_u32_e32 v4, vcc, v2, v6
 ; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, v3, v7, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3]
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[6:7]
+; GFX9-NEXT:    v_cmp_le_i64_e64 s[4:5], 0, v[6:7]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
 ; GFX9-NEXT:    v_add_u32_e32 v3, 0x80000000, v2
 ; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
@@ -4881,10 +4881,10 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX10-NEXT:    v_sub_co_ci_u32_e32 v11, vcc_lo, v3, v7, vcc_lo
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v12, 31, v9
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[8:9], v[0:1]
-; GFX10-NEXT:    v_cmp_lt_i64_e64 s4, 0, v[4:5]
+; GFX10-NEXT:    v_cmp_le_i64_e64 s4, 0, v[4:5]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v4, 31, v11
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s5, v[10:11], v[2:3]
-; GFX10-NEXT:    v_cmp_lt_i64_e64 s6, 0, v[6:7]
+; GFX10-NEXT:    v_cmp_le_i64_e64 s6, 0, v[6:7]
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v12
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v4
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s4, vcc_lo
@@ -4904,10 +4904,10 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX11-NEXT:    v_sub_co_ci_u32_e64 v11, null, v3, v7, vcc_lo
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v12, 31, v9
 ; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[8:9], v[0:1]
-; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, 0, v[4:5]
+; GFX11-NEXT:    v_cmp_le_i64_e64 s0, 0, v[4:5]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v4, 31, v11
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s1, v[10:11], v[2:3]
-; GFX11-NEXT:    v_cmp_lt_i64_e64 s2, 0, v[6:7]
+; GFX11-NEXT:    v_cmp_le_i64_e64 s2, 0, v[6:7]
 ; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0x80000000, v12
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0x80000000, v4
 ; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
@@ -5334,15 +5334,15 @@ define amdgpu_ps <4 x float> @ssubsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX6-NEXT:    v_mov_b32_e32 v7, s3
 ; GFX6-NEXT:    v_subb_u32_e32 v6, vcc, v6, v2, vcc
 ; GFX6-NEXT:    v_subb_u32_e32 v7, vcc, v7, v3, vcc
-; GFX6-NEXT:    v_cmp_gt_u64_e32 vcc, s[0:1], v[4:5]
+; GFX6-NEXT:    v_cmp_ge_u64_e32 vcc, s[0:1], v[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[6:7]
+; GFX6-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[6:7]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, s[2:3], v[6:7]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX6-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[0:1]
+; GFX6-NEXT:    v_cmp_le_u64_e32 vcc, 0, v[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, 0, v[2:3]
+; GFX6-NEXT:    v_cmp_le_i64_e32 vcc, 0, v[2:3]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
@@ -5367,15 +5367,15 @@ define amdgpu_ps <4 x float> @ssubsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX8-NEXT:    v_mov_b32_e32 v7, s3
 ; GFX8-NEXT:    v_subb_u32_e32 v6, vcc, v6, v2, vcc
 ; GFX8-NEXT:    v_subb_u32_e32 v7, vcc, v7, v3, vcc
-; GFX8-NEXT:    v_cmp_gt_u64_e32 vcc, s[0:1], v[4:5]
+; GFX8-NEXT:    v_cmp_ge_u64_e32 vcc, s[0:1], v[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[6:7]
+; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[6:7]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, s[2:3], v[6:7]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[0:1]
+; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, 0, v[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, 0, v[2:3]
+; GFX8-NEXT:    v_cmp_le_i64_e32 vcc, 0, v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
@@ -5400,15 +5400,15 @@ define amdgpu_ps <4 x float> @ssubsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_mov_b32_e32 v7, s3
 ; GFX9-NEXT:    v_subb_co_u32_e32 v6, vcc, v6, v2, vcc
 ; GFX9-NEXT:    v_subb_co_u32_e32 v7, vcc, v7, v3, vcc
-; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, s[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, s[0:1], v[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[6:7]
+; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[6:7]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, s[2:3], v[6:7]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[0:1]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, 0, v[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, 0, v[2:3]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, 0, v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
@@ -5429,13 +5429,13 @@ define amdgpu_ps <4 x float> @ssubsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX10-NEXT:    v_sub_co_ci_u32_e32 v5, vcc_lo, s1, v1, vcc_lo
 ; GFX10-NEXT:    v_sub_co_ci_u32_e32 v6, vcc_lo, s2, v2, vcc_lo
 ; GFX10-NEXT:    v_sub_co_ci_u32_e32 v7, vcc_lo, s3, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[0:1], v[4:5]
+; GFX10-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[0:1], v[4:5]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc_lo
-; GFX10-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[6:7]
+; GFX10-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[2:3], v[6:7]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc_lo
-; GFX10-NEXT:    v_cmp_lt_u64_e32 vcc_lo, 0, v[0:1]
+; GFX10-NEXT:    v_cmp_le_u64_e32 vcc_lo, 0, v[0:1]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, 0, v[2:3]
+; GFX10-NEXT:    v_cmp_le_i64_e32 vcc_lo, 0, v[2:3]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
 ; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[2:3], v[6:7]
 ; GFX10-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc_lo
@@ -5458,13 +5458,13 @@ define amdgpu_ps <4 x float> @ssubsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
 ; GFX11-NEXT:    v_sub_co_ci_u32_e32 v5, vcc_lo, s1, v1, vcc_lo
 ; GFX11-NEXT:    v_sub_co_ci_u32_e32 v6, vcc_lo, s2, v2, vcc_lo
 ; GFX11-NEXT:    v_sub_co_ci_u32_e64 v7, null, s3, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[0:1], v[4:5]
+; GFX11-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[0:1], v[4:5]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[6:7]
+; GFX11-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[2:3], v[6:7]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc_lo
-; GFX11-NEXT:    v_cmp_lt_u64_e32 vcc_lo, 0, v[0:1]
+; GFX11-NEXT:    v_cmp_le_u64_e32 vcc_lo, 0, v[0:1]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, 0, v[2:3]
+; GFX11-NEXT:    v_cmp_le_i64_e32 vcc_lo, 0, v[2:3]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[2:3], v[6:7]
 ; GFX11-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc_lo
@@ -5669,9 +5669,9 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX6-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, v[18:19], v[2:3]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX6-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[8:9]
+; GFX6-NEXT:    v_cmp_le_u64_e32 vcc, 0, v[8:9]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, 0, v[10:11]
+; GFX6-NEXT:    v_cmp_le_i64_e32 vcc, 0, v[10:11]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
@@ -5695,9 +5695,9 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX6-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[12:13]
+; GFX6-NEXT:    v_cmp_le_u64_e32 vcc, 0, v[12:13]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, 0, v[14:15]
+; GFX6-NEXT:    v_cmp_le_i64_e32 vcc, 0, v[14:15]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
@@ -5725,9 +5725,9 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[18:19], v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[8:9]
+; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, 0, v[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, 0, v[10:11]
+; GFX8-NEXT:    v_cmp_le_i64_e32 vcc, 0, v[10:11]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
@@ -5751,9 +5751,9 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[12:13]
+; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, 0, v[12:13]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, 0, v[14:15]
+; GFX8-NEXT:    v_cmp_le_i64_e32 vcc, 0, v[14:15]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
@@ -5781,9 +5781,9 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[18:19], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[8:9]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, 0, v[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, 0, v[10:11]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, 0, v[10:11]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
@@ -5806,9 +5806,9 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[12:13]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, 0, v[12:13]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, 0, v[14:15]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, 0, v[14:15]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
@@ -5836,9 +5836,9 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
 ; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[18:19], v[2:3]
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX10-NEXT:    v_cmp_lt_u64_e32 vcc_lo, 0, v[8:9]
+; GFX10-NEXT:    v_cmp_le_u64_e32 vcc_lo, 0, v[8:9]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, 0, v[10:11]
+; GFX10-NEXT:    v_cmp_le_i64_e32 vcc_lo, 0, v[10:11]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
 ; GFX10-NEXT:    v_sub_co_u32 v8, vcc_lo, v4, v12
 ; GFX10-NEXT:    v_sub_co_ci_u32_e32 v9, vcc_lo, v5, v13, vcc_lo
@@ -5852,9 +5852,9 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[20:21], v[6:7]
 ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
-; GFX10-NEXT:    v_cmp_lt_u64_e32 vcc_lo, 0, v[12:13]
+; GFX10-NEXT:    v_cmp_le_u64_e32 vcc_lo, 0, v[12:13]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc_lo
-; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, 0, v[14:15]
+; GFX10-NEXT:    v_cmp_le_i64_e32 vcc_lo, 0, v[14:15]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc_lo
 ; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[20:21], v[6:7]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v21
@@ -5891,9 +5891,9 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[18:19], v[2:3]
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
-; GFX11-NEXT:    v_cmp_lt_u64_e32 vcc_lo, 0, v[8:9]
+; GFX11-NEXT:    v_cmp_le_u64_e32 vcc_lo, 0, v[8:9]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, 0, v[10:11]
+; GFX11-NEXT:    v_cmp_le_i64_e32 vcc_lo, 0, v[10:11]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_sub_co_u32 v8, vcc_lo, v4, v12
 ; GFX11-NEXT:    v_sub_co_ci_u32_e32 v9, vcc_lo, v5, v13, vcc_lo
@@ -5906,9 +5906,9 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[20:21], v[6:7]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
-; GFX11-NEXT:    v_cmp_lt_u64_e32 vcc_lo, 0, v[12:13]
+; GFX11-NEXT:    v_cmp_le_u64_e32 vcc_lo, 0, v[12:13]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc_lo
-; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, 0, v[14:15]
+; GFX11-NEXT:    v_cmp_le_i64_e32 vcc_lo, 0, v[14:15]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[20:21], v[6:7]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v21
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll
index fe2667bc4c920..46f311d0d70e5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll
@@ -205,7 +205,7 @@ define i32 @v_ssubo_i32(i32 %a, i32 %b) {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
 ; GFX7-NEXT:    v_cmp_lt_i32_e32 vcc, v2, v0
-; GFX7-NEXT:    v_cmp_lt_i32_e64 s[4:5], 0, v1
+; GFX7-NEXT:    v_cmp_le_i32_e64 s[4:5], 0, v1
 ; GFX7-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7-NEXT:    v_sub_i32_e32 v0, vcc, v2, v0
@@ -216,7 +216,7 @@ define i32 @v_ssubo_i32(i32 %a, i32 %b) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v0, v1
 ; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, v2, v0
-; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], 0, v1
+; GFX8-NEXT:    v_cmp_le_i32_e64 s[4:5], 0, v1
 ; GFX8-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v2, v0
@@ -227,7 +227,7 @@ define i32 @v_ssubo_i32(i32 %a, i32 %b) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_sub_u32_e32 v2, v0, v1
 ; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, v2, v0
-; GFX9-NEXT:    v_cmp_lt_i32_e64 s[4:5], 0, v1
+; GFX9-NEXT:    v_cmp_le_i32_e64 s[4:5], 0, v1
 ; GFX9-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX9-NEXT:    v_sub_u32_e32 v0, v2, v0
@@ -247,7 +247,7 @@ define i64 @v_ssubo_i64(i64 %a, i64 %b) {
 ; GFX7-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
 ; GFX7-NEXT:    v_subb_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX7-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX7-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
+; GFX7-NEXT:    v_cmp_le_i64_e64 s[4:5], 0, v[2:3]
 ; GFX7-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7-NEXT:    v_sub_i32_e32 v0, vcc, v4, v0
@@ -260,7 +260,7 @@ define i64 @v_ssubo_i64(i64 %a, i64 %b) {
 ; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v0, v2
 ; GFX8-NEXT:    v_subb_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
+; GFX8-NEXT:    v_cmp_le_i64_e64 s[4:5], 0, v[2:3]
 ; GFX8-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v4, v0
@@ -273,7 +273,7 @@ define i64 @v_ssubo_i64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_sub_co_u32_e32 v4, vcc, v0, v2
 ; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
+; GFX9-NEXT:    v_cmp_le_i64_e64 s[4:5], 0, v[2:3]
 ; GFX9-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v4, v0
@@ -295,8 +295,8 @@ define <2 x i32> @v_ssubo_v2i32(<2 x i32> %a, <2 x i32> %b) {
 ; GFX7-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
 ; GFX7-NEXT:    v_cmp_lt_i32_e32 vcc, v4, v0
 ; GFX7-NEXT:    v_cmp_lt_i32_e64 s[4:5], v5, v1
-; GFX7-NEXT:    v_cmp_lt_i32_e64 s[6:7], 0, v2
-; GFX7-NEXT:    v_cmp_lt_i32_e64 s[8:9], 0, v3
+; GFX7-NEXT:    v_cmp_le_i32_e64 s[6:7], 0, v2
+; GFX7-NEXT:    v_cmp_le_i32_e64 s[8:9], 0, v3
 ; GFX7-NEXT:    s_xor_b64 s[6:7], s[6:7], vcc
 ; GFX7-NEXT:    s_xor_b64 s[4:5], s[8:9], s[4:5]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[6:7]
@@ -312,8 +312,8 @@ define <2 x i32> @v_ssubo_v2i32(<2 x i32> %a, <2 x i32> %b) {
 ; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, v1, v3
 ; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, v4, v0
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v5, v1
-; GFX8-NEXT:    v_cmp_lt_i32_e64 s[6:7], 0, v2
-; GFX8-NEXT:    v_cmp_lt_i32_e64 s[8:9], 0, v3
+; GFX8-NEXT:    v_cmp_le_i32_e64 s[6:7], 0, v2
+; GFX8-NEXT:    v_cmp_le_i32_e64 s[8:9], 0, v3
 ; GFX8-NEXT:    s_xor_b64 s[6:7], s[6:7], vcc
 ; GFX8-NEXT:    s_xor_b64 s[4:5], s[8:9], s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[6:7]
@@ -329,8 +329,8 @@ define <2 x i32> @v_ssubo_v2i32(<2 x i32> %a, <2 x i32> %b) {
 ; GFX9-NEXT:    v_sub_u32_e32 v5, v1, v3
 ; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, v4, v0
 ; GFX9-NEXT:    v_cmp_lt_i32_e64 s[4:5], v5, v1
-; GFX9-NEXT:    v_cmp_lt_i32_e64 s[6:7], 0, v2
-; GFX9-NEXT:    v_cmp_lt_i32_e64 s[8:9], 0, v3
+; GFX9-NEXT:    v_cmp_le_i32_e64 s[6:7], 0, v2
+; GFX9-NEXT:    v_cmp_le_i32_e64 s[8:9], 0, v3
 ; GFX9-NEXT:    s_xor_b64 s[6:7], s[6:7], vcc
 ; GFX9-NEXT:    s_xor_b64 s[4:5], s[8:9], s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[6:7]
@@ -355,7 +355,7 @@ define i8 @v_ssubo_i8(i8 %a, i8 %b) {
 ; GFX7-NEXT:    v_bfe_i32 v0, v0, 0, 8
 ; GFX7-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX7-NEXT:    v_bfe_i32 v0, v1, 0, 8
-; GFX7-NEXT:    v_cmp_lt_i32_e64 s[4:5], 0, v0
+; GFX7-NEXT:    v_cmp_le_i32_e64 s[4:5], 0, v0
 ; GFX7-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7-NEXT:    v_sub_i32_e32 v0, vcc, v2, v0
@@ -369,7 +369,7 @@ define i8 @v_ssubo_i8(i8 %a, i8 %b) {
 ; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 8
 ; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX8-NEXT:    v_bfe_i32 v0, v1, 0, 8
-; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], 0, v0
+; GFX8-NEXT:    v_cmp_le_i32_e64 s[4:5], 0, v0
 ; GFX8-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX8-NEXT:    v_sub_u16_e32 v0, v2, v0
@@ -403,7 +403,7 @@ define i7 @v_ssubo_i7(i7 %a, i7 %b) {
 ; GFX7-NEXT:    v_bfe_i32 v0, v0, 0, 7
 ; GFX7-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX7-NEXT:    v_bfe_i32 v0, v1, 0, 7
-; GFX7-NEXT:    v_cmp_lt_i32_e64 s[4:5], 0, v0
+; GFX7-NEXT:    v_cmp_le_i32_e64 s[4:5], 0, v0
 ; GFX7-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7-NEXT:    v_sub_i32_e32 v0, vcc, v2, v0
@@ -417,7 +417,7 @@ define i7 @v_ssubo_i7(i7 %a, i7 %b) {
 ; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 7
 ; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX8-NEXT:    v_bfe_i32 v0, v1, 0, 7
-; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], 0, v0
+; GFX8-NEXT:    v_cmp_le_i32_e64 s[4:5], 0, v0
 ; GFX8-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX8-NEXT:    v_sub_u16_e32 v0, v2, v0
@@ -431,7 +431,7 @@ define i7 @v_ssubo_i7(i7 %a, i7 %b) {
 ; GFX9-NEXT:    v_bfe_i32 v0, v0, 0, 7
 ; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX9-NEXT:    v_bfe_i32 v0, v1, 0, 7
-; GFX9-NEXT:    v_cmp_lt_i32_e64 s[4:5], 0, v0
+; GFX9-NEXT:    v_cmp_le_i32_e64 s[4:5], 0, v0
 ; GFX9-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX9-NEXT:    v_sub_u16_e32 v0, v2, v0
@@ -802,7 +802,7 @@ define i8 @s_ssubo_i8(i8 %a, i8 %b) {
 ; GFX7-NEXT:    v_bfe_i32 v0, v0, 0, 8
 ; GFX7-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX7-NEXT:    v_bfe_i32 v0, v1, 0, 8
-; GFX7-NEXT:    v_cmp_lt_i32_e64 s[4:5], 0, v0
+; GFX7-NEXT:    v_cmp_le_i32_e64 s[4:5], 0, v0
 ; GFX7-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7-NEXT:    v_sub_i32_e32 v0, vcc, v2, v0
@@ -816,7 +816,7 @@ define i8 @s_ssubo_i8(i8 %a, i8 %b) {
 ; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 8
 ; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX8-NEXT:    v_bfe_i32 v0, v1, 0, 8
-; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], 0, v0
+; GFX8-NEXT:    v_cmp_le_i32_e64 s[4:5], 0, v0
 ; GFX8-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX8-NEXT:    v_sub_u16_e32 v0, v2, v0
@@ -850,7 +850,7 @@ define i7 @s_ssubo_i7(i7 %a, i7 %b) {
 ; GFX7-NEXT:    v_bfe_i32 v0, v0, 0, 7
 ; GFX7-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX7-NEXT:    v_bfe_i32 v0, v1, 0, 7
-; GFX7-NEXT:    v_cmp_lt_i32_e64 s[4:5], 0, v0
+; GFX7-NEXT:    v_cmp_le_i32_e64 s[4:5], 0, v0
 ; GFX7-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7-NEXT:    v_sub_i32_e32 v0, vcc, v2, v0
@@ -864,7 +864,7 @@ define i7 @s_ssubo_i7(i7 %a, i7 %b) {
 ; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 7
 ; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX8-NEXT:    v_bfe_i32 v0, v1, 0, 7
-; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], 0, v0
+; GFX8-NEXT:    v_cmp_le_i32_e64 s[4:5], 0, v0
 ; GFX8-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX8-NEXT:    v_sub_u16_e32 v0, v2, v0
@@ -878,7 +878,7 @@ define i7 @s_ssubo_i7(i7 %a, i7 %b) {
 ; GFX9-NEXT:    v_bfe_i32 v0, v0, 0, 7
 ; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v0
 ; GFX9-NEXT:    v_bfe_i32 v0, v1, 0, 7
-; GFX9-NEXT:    v_cmp_lt_i32_e64 s[4:5], 0, v0
+; GFX9-NEXT:    v_cmp_le_i32_e64 s[4:5], 0, v0
 ; GFX9-NEXT:    s_xor_b64 s[4:5], s[4:5], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX9-NEXT:    v_sub_u16_e32 v0, v2, v0
@@ -969,8 +969,8 @@ define amdgpu_ps i32 @ssubo_i32_sv(i32 inreg %a, i32 %b) {
 ; GFX7-LABEL: ssubo_i32_sv:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, s0, v0
-; GFX7-NEXT:    v_cmp_gt_i32_e32 vcc, s0, v1
-; GFX7-NEXT:    v_cmp_lt_i32_e64 s[0:1], 0, v0
+; GFX7-NEXT:    v_cmp_ge_i32_e32 vcc, s0, v1
+; GFX7-NEXT:    v_cmp_le_i32_e64 s[0:1], 0, v0
 ; GFX7-NEXT:    s_xor_b64 s[0:1], s[0:1], vcc
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
 ; GFX7-NEXT:    v_sub_i32_e32 v0, vcc, v1, v0
@@ -980,8 +980,8 @@ define amdgpu_ps i32 @ssubo_i32_sv(i32 inreg %a, i32 %b) {
 ; GFX8-LABEL: ssubo_i32_sv:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, s0, v0
-; GFX8-NEXT:    v_cmp_gt_i32_e32 vcc, s0, v1
-; GFX8-NEXT:    v_cmp_lt_i32_e64 s[0:1], 0, v0
+; GFX8-NEXT:    v_cmp_ge_i32_e32 vcc, s0, v1
+; GFX8-NEXT:    v_cmp_le_i32_e64 s[0:1], 0, v0
 ; GFX8-NEXT:    s_xor_b64 s[0:1], s[0:1], vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v1, v0
@@ -991,8 +991,8 @@ define amdgpu_ps i32 @ssubo_i32_sv(i32 inreg %a, i32 %b) {
 ; GFX9-LABEL: ssubo_i32_sv:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_sub_u32_e32 v1, s0, v0
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, s0, v1
-; GFX9-NEXT:    v_cmp_lt_i32_e64 s[0:1], 0, v0
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, s0, v1
+; GFX9-NEXT:    v_cmp_le_i32_e64 s[0:1], 0, v0
 ; GFX9-NEXT:    s_xor_b64 s[0:1], s[0:1], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
 ; GFX9-NEXT:    v_sub_u32_e32 v0, v1, v0
@@ -1013,8 +1013,8 @@ define amdgpu_ps i16 @ssubo_i16_sv(i16 inreg %a, i16 %b) {
 ; GFX7-NEXT:    v_bfe_i32 v2, v1, 0, 16
 ; GFX7-NEXT:    s_sext_i32_i16 s0, s0
 ; GFX7-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; GFX7-NEXT:    v_cmp_gt_i32_e32 vcc, s0, v2
-; GFX7-NEXT:    v_cmp_lt_i32_e64 s[0:1], 0, v0
+; GFX7-NEXT:    v_cmp_ge_i32_e32 vcc, s0, v2
+; GFX7-NEXT:    v_cmp_le_i32_e64 s[0:1], 0, v0
 ; GFX7-NEXT:    s_xor_b64 s[0:1], s[0:1], vcc
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
 ; GFX7-NEXT:    v_sub_i32_e32 v0, vcc, v1, v0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
index cd01148fa7dd7..ff47af8d8b9a6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
@@ -74,12 +74,12 @@ define amdgpu_ps i32 @s_udiv_i32(i32 inreg %num, i32 inreg %den) {
 ; GISEL-NEXT:    v_mul_lo_u32 v1, v0, s1
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
 ; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, s0, v1
-; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s1, v1
+; GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s1, v1
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-NEXT:    v_subrev_i32_e64 v2, s[2:3], s1, v1
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s1, v1
+; GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s1, v1
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-NEXT:    v_readfirstlane_b32 s0, v0
 ; GISEL-NEXT:    ; return to shader part epilog
@@ -98,12 +98,12 @@ define amdgpu_ps i32 @s_udiv_i32(i32 inreg %num, i32 inreg %den) {
 ; CGP-NEXT:    v_mul_lo_u32 v1, v0, s1
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, s0, v1
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s1, v1
+; CGP-NEXT:    v_cmp_lt_u32_e32 vcc, s1, v1
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CGP-NEXT:    v_subrev_i32_e64 v2, s[2:3], s1, v1
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s1, v1
+; CGP-NEXT:    v_cmp_lt_u32_e32 vcc, s1, v1
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CGP-NEXT:    v_readfirstlane_b32 s0, v0
 ; CGP-NEXT:    ; return to shader part epilog
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
index 1a6d26142208f..d7d59682e9194 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -299,18 +299,18 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, s0, v6
 ; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v4, vcc
 ; CHECK-NEXT:    v_sub_i32_e64 v4, s[4:5], s1, v4
-; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s2, v6
+; CHECK-NEXT:    v_cmp_lt_u32_e64 s[4:5], s2, v6
 ; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s3, v3
+; CHECK-NEXT:    v_cmp_lt_u32_e64 s[4:5], s3, v3
 ; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
 ; CHECK-NEXT:    v_subb_u32_e32 v0, vcc, v4, v0, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v3
 ; CHECK-NEXT:    v_cndmask_b32_e32 v3, v8, v7, vcc
 ; CHECK-NEXT:    v_subrev_i32_e32 v4, vcc, s2, v6
 ; CHECK-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v4
+; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v4
 ; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s3, v0
+; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s3, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v0
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
@@ -337,12 +337,12 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mul_lo_u32 v1, v0, s2
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
 ; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, s0, v1
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v1
+; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v1
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CHECK-NEXT:    v_subrev_i32_e64 v2, s[0:1], s2, v1
 ; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v1
+; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v1
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CHECK-NEXT:  .LBB1_5:
 ; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
index 1aaf3122cc00d..0664c896b8fe0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
@@ -27,12 +27,12 @@ define amdgpu_kernel void @udivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v2, s5
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s4, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s5, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s5, v3
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
@@ -60,12 +60,12 @@ define amdgpu_kernel void @udivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    v_mul_lo_u32 v1, v0, s5
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v1, s4, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -92,11 +92,11 @@ define amdgpu_kernel void @udivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX10-NEXT:    v_add_nc_u32_e32 v2, 1, v0
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s4, v1
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s5, v1
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s5, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s5, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v2, 1, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s5, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s5, v1
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s5, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 0
@@ -215,9 +215,9 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s8, v0
 ; GFX8-NEXT:    v_subb_u32_e64 v6, s[0:1], v6, v1, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v0, s[0:1], s9, v1
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v6
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s11, v6
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s10, v2
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v6
 ; GFX8-NEXT:    v_subb_u32_e32 v0, vcc, v0, v5, vcc
@@ -226,9 +226,9 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_subbrev_u32_e64 v8, s[0:1], 0, v0, vcc
 ; GFX8-NEXT:    v_add_u32_e64 v9, s[0:1], 1, v4
 ; GFX8-NEXT:    v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v8
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s11, v8
 ; GFX8-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v7
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s10, v7
 ; GFX8-NEXT:    v_subb_u32_e32 v0, vcc, v0, v5, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v8
@@ -351,10 +351,10 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s19, v5, v[1:2]
 ; GFX9-NEXT:    v_sub_co_u32_e32 v2, vcc, s16, v0
 ; GFX9-NEXT:    v_subb_co_u32_e64 v6, s[0:1], v6, v1, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s19, v6
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s19, v6
 ; GFX9-NEXT:    v_sub_u32_e32 v0, s17, v1
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s18, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s18, v2
 ; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s19, v6
 ; GFX9-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v7, vcc
@@ -363,9 +363,9 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    v_subbrev_co_u32_e64 v9, s[0:1], 0, v0, vcc
 ; GFX9-NEXT:    v_add_co_u32_e64 v10, s[0:1], 1, v5
 ; GFX9-NEXT:    v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s19, v9
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s19, v9
 ; GFX9-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s18, v8
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s18, v8
 ; GFX9-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v7, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s19, v9
@@ -485,16 +485,16 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v6, s17, v1
 ; GFX10-NEXT:    v_sub_co_ci_u32_e64 v8, s0, s17, v1, vcc_lo
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v0, vcc_lo, s19, v6, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s18, v7
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s18, v7
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
 ; GFX10-NEXT:    v_sub_co_u32 v6, vcc_lo, v7, s18
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v9, s0, 0, v0, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s19, v8
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s19, v8
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v0, vcc_lo, s19, v0, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s18, v6
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s18, v6
 ; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s19, v9
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s19, v9
 ; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s0
 ; GFX10-NEXT:    v_add_co_u32 v13, s0, v2, 1
 ; GFX10-NEXT:    v_add_co_ci_u32_e64 v14, s0, 0, v4, s0
@@ -555,23 +555,23 @@ define amdgpu_kernel void @udivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
 ; GFX8-NEXT:    v_mul_lo_u32 v4, v1, s11
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s8, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s10, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s10, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s10, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s10, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s10, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s10, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s9, v4
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s11, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s11, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s11, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s11, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s11, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s11, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
@@ -610,22 +610,22 @@ define amdgpu_kernel void @udivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_mul_lo_u32 v3, v1, s19
 ; GFX9-NEXT:    v_add_u32_e32 v5, 1, v1
 ; GFX9-NEXT:    v_sub_u32_e32 v2, s16, v2
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s18, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s18, v2
 ; GFX9-NEXT:    v_sub_u32_e32 v3, s17, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s18, v2
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s19, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s19, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s[0:1]
 ; GFX9-NEXT:    v_subrev_u32_e32 v5, s19, v3
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s18, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s18, v2
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s18, v2
 ; GFX9-NEXT:    v_add_u32_e32 v5, 1, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s19, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s19, v3
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s19, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
@@ -663,18 +663,18 @@ define amdgpu_kernel void @udivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX10-NEXT:    v_add_nc_u32_e32 v5, 1, v1
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, s16, v2
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v3, s17, v3
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s18, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s18, v2
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, s18, v2
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s19, v3
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s19, v3
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s19, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v4, 1, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s18, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s18, v2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v5, 1, v1
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s19, v3
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s19, v3
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, s18, v2
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s19, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
@@ -723,12 +723,12 @@ define amdgpu_kernel void @udivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
 ; GFX8-NEXT:    v_mul_lo_u32 v5, v1, s17
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s12, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s16, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s16, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s16, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s16, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s16, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s16, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v2, v3, vcc
@@ -737,14 +737,14 @@ define amdgpu_kernel void @udivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 1, v1
 ; GFX8-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s17, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s17, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v5, s[0:1], s17, v2
 ; GFX8-NEXT:    s_sub_i32 s0, 0, s18
 ; GFX8-NEXT:    v_mul_lo_u32 v6, s0, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s17, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s17, v2
 ; GFX8-NEXT:    v_mul_hi_u32 v6, v3, v6
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v5, s19
@@ -761,26 +761,26 @@ define amdgpu_kernel void @udivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 1, v3
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s14, v2
 ; GFX8-NEXT:    v_mul_hi_u32 v7, v6, v7
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s18, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s18, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v8, s[0:1], s18, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v8, v2, v8, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v6, v7
 ; GFX8-NEXT:    v_mul_hi_u32 v7, s15, v2
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 1, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s18, v8
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s18, v8
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v7, s19
 ; GFX8-NEXT:    v_subrev_u32_e64 v6, s[0:1], s18, v8
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v8, v6, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s15, v3
 ; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 1, v7
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s19, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s19, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v8, s[0:1], s19, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v8, v3, v8, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v7
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s19, v8
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s19, v8
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v7, s[0:1], s19, v8
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v8, v7, vcc
@@ -827,18 +827,18 @@ define amdgpu_kernel void @udivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_add_u32_e32 v6, 1, v1
 ; GFX9-NEXT:    v_sub_u32_e32 v3, s0, v3
 ; GFX9-NEXT:    v_sub_u32_e32 v7, s1, v4
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v3
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s4, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s4, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v3, v4, vcc
 ; GFX9-NEXT:    v_mul_lo_u32 v3, s12, v2
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, s7
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v7
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v7
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
 ; GFX9-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v5, v5
@@ -849,7 +849,7 @@ define amdgpu_kernel void @udivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_mul_hi_u32 v2, s2, v2
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; GFX9-NEXT:    v_add_u32_e32 v7, 1, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v6
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v6
 ; GFX9-NEXT:    s_sub_i32 s0, 0, s7
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
 ; GFX9-NEXT:    v_mul_lo_u32 v7, v2, s6
@@ -859,7 +859,7 @@ define amdgpu_kernel void @udivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_sub_u32_e32 v6, s2, v7
 ; GFX9-NEXT:    v_mul_hi_u32 v7, v3, v8
 ; GFX9-NEXT:    v_add_u32_e32 v8, 1, v2
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v6
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v6
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, v3, v7
 ; GFX9-NEXT:    v_mul_hi_u32 v3, s3, v3
@@ -867,18 +867,18 @@ define amdgpu_kernel void @udivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v7, 1, v2
 ; GFX9-NEXT:    v_mul_lo_u32 v8, v3, s7
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v6
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v6
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v7, s6, v6
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v7, s3, v8
 ; GFX9-NEXT:    v_add_u32_e32 v8, 1, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s7, v7
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s7, v7
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v8, s7, v7
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v8, 1, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s7, v7
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s7, v7
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v8, s7, v7
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
@@ -942,10 +942,10 @@ define amdgpu_kernel void @udivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v5, s1, v5
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v6, s2, v6
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v7, s3, v7
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s4, v4
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s5, v5
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s6, v6
-; GFX10-NEXT:    v_cmp_le_u32_e64 s2, s7, v7
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s4, v4
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s5, v5
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s1, s6, v6
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s2, s7, v7
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v9, s4, v4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v10, s0
@@ -961,11 +961,11 @@ define amdgpu_kernel void @udivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v12, s2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v10, 1, v1
 ; GFX10-NEXT:    v_add_nc_u32_e32 v11, 1, v2
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s4, v4
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s5, v5
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s6, v6
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s4, v4
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s5, v5
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s1, s6, v6
 ; GFX10-NEXT:    v_add_nc_u32_e32 v12, 1, v3
-; GFX10-NEXT:    v_cmp_le_u32_e64 s2, s7, v7
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s2, s7, v7
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v9, s4, v4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v10, s0
@@ -1095,9 +1095,9 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_mov_b32_e32 v4, s13
 ; GFX8-NEXT:    v_subb_u32_e64 v0, s[0:1], v3, v1, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v1, s[0:1], s9, v1
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s13, v0
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s13, v0
 ; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v8
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s12, v8
 ; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s13, v0
 ; GFX8-NEXT:    v_cndmask_b32_e64 v9, v2, v3, s[0:1]
@@ -1119,11 +1119,11 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_addc_u32_e64 v14, s[0:1], 0, v7, s[0:1]
 ; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s2, v12, 0
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v15, v3
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s13, v11
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s13, v11
 ; GFX8-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_subb_u32_e32 v4, vcc, v5, v4, vcc
 ; GFX8-NEXT:    v_mad_u64_u32 v[2:3], s[0:1], s2, v15, v[2:3]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v10
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s12, v10
 ; GFX8-NEXT:    v_cndmask_b32_e64 v17, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_mad_u64_u32 v[2:3], s[0:1], s3, v12, v[2:3]
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s13, v11
@@ -1222,18 +1222,18 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, s10, v3
 ; GFX8-NEXT:    v_subb_u32_e64 v11, s[0:1], v4, v7, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v3, s[0:1], s11, v7
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s15, v11
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s15, v11
 ; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s14, v8
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s14, v8
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s15, v11
 ; GFX8-NEXT:    v_subb_u32_e32 v3, vcc, v3, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[0:1]
 ; GFX8-NEXT:    v_subrev_u32_e32 v7, vcc, s14, v8
 ; GFX8-NEXT:    v_subbrev_u32_e64 v12, s[0:1], 0, v3, vcc
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s15, v12
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s15, v12
 ; GFX8-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s14, v7
+; GFX8-NEXT:    v_cmp_lt_u32_e64 s[0:1], s14, v7
 ; GFX8-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s15, v12
 ; GFX8-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[0:1]
@@ -1360,9 +1360,9 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_mad_u64_u32 v[2:3], s[0:1], s5, v7, v[1:2]
 ; GFX9-NEXT:    v_sub_co_u32_e32 v1, vcc, s16, v0
 ; GFX9-NEXT:    v_subb_co_u32_e64 v0, s[0:1], v4, v2, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s5, v0
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s5, v0
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s4, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s5, v0
 ; GFX9-NEXT:    v_cndmask_b32_e64 v9, v3, v4, s[0:1]
@@ -1385,11 +1385,11 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_addc_co_u32_e64 v14, s[0:1], 0, v8, s[0:1]
 ; GFX9-NEXT:    v_mad_u64_u32 v[2:3], s[0:1], s2, v12, 0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v15, v4
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s5, v11
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s5, v11
 ; GFX9-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, v6, v5, vcc
 ; GFX9-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], s2, v15, v[3:4]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s4, v10
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s4, v10
 ; GFX9-NEXT:    v_cndmask_b32_e64 v17, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], s3, v12, v[3:4]
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s5, v11
@@ -1484,19 +1484,19 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s7, v11, v[0:1]
 ; GFX9-NEXT:    v_sub_co_u32_e32 v1, vcc, s18, v4
 ; GFX9-NEXT:    v_subb_co_u32_e64 v9, s[0:1], v9, v0, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s7, v9
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s7, v9
 ; GFX9-NEXT:    v_sub_u32_e32 v0, s19, v0
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s6, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s6, v1
 ; GFX9-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s7, v9
 ; GFX9-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v5, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[0:1]
 ; GFX9-NEXT:    v_subrev_co_u32_e32 v10, vcc, s6, v1
 ; GFX9-NEXT:    v_subbrev_co_u32_e64 v12, s[0:1], 0, v0, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s7, v12
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s7, v12
 ; GFX9-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s6, v10
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s6, v10
 ; GFX9-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s7, v12
 ; GFX9-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[0:1]
@@ -1709,27 +1709,27 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX10-NEXT:    v_sub_co_u32 v14, vcc_lo, s16, v0
 ; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s7, v10, v[5:6]
 ; GFX10-NEXT:    v_sub_co_ci_u32_e64 v5, s0, s17, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s4, v14
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s4, v14
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s17, v3
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s0
 ; GFX10-NEXT:    v_sub_co_u32 v15, s0, s18, v2
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v1, vcc_lo, s5, v1, vcc_lo
 ; GFX10-NEXT:    v_sub_co_ci_u32_e64 v16, s1, s19, v0, s0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s6, v15
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s6, v15
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s19, v0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc_lo
 ; GFX10-NEXT:    v_sub_co_u32 v17, vcc_lo, v14, s4
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v18, s1, 0, v1, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s5, v5
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s1, s5, v5
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v23, s0, s7, v0, s0
 ; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s5, v18
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v1, vcc_lo, s5, v1, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v19, 0, -1, s1
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s4, v17
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s1, s4, v17
 ; GFX10-NEXT:    v_cndmask_b32_e64 v20, 0, -1, s1
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s5, v18
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s1, s5, v18
 ; GFX10-NEXT:    v_cndmask_b32_e64 v21, 0, -1, s1
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s7, v16
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s1, s7, v16
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, v21, v20, s0
 ; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s5, v5
 ; GFX10-NEXT:    v_cndmask_b32_e64 v22, 0, -1, s1
@@ -1750,9 +1750,9 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX10-NEXT:    v_cndmask_b32_e64 v4, v14, v4, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v3, s0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v22, v2, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s7, v12
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s7, v12
 ; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s6, v6
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s6, v6
 ; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v13, vcc_lo, v10, 1
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v14, vcc_lo, 0, v7, vcc_lo
@@ -1809,12 +1809,12 @@ define amdgpu_kernel void @udiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v2, s5
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s4, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s5, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s5, v3
 ; GFX8-NEXT:    flat_store_byte v[0:1], v2
@@ -1844,12 +1844,12 @@ define amdgpu_kernel void @udiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out
 ; GFX9-NEXT:    v_mul_lo_u32 v1, v0, s4
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v1, s5, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -1878,11 +1878,11 @@ define amdgpu_kernel void @udiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s0, v1
 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s4, v1
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s4, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s4, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v2, 1, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s4, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s4, v1
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s4, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 0
@@ -1932,24 +1932,24 @@ define amdgpu_kernel void @udivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_mul_lo_u32 v2, v0, s2
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s1, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s2, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s2, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s2, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v1, s3
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s2, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s8, v3
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s3, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s3, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s3, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX8-NEXT:    v_and_b32_e32 v1, 0xff, v1
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s3, v3
@@ -1998,23 +1998,23 @@ define amdgpu_kernel void @udivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v1
 ; GFX9-NEXT:    v_mul_lo_u32 v2, v0, s5
 ; GFX9-NEXT:    v_sub_u32_e32 v3, s6, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s4, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v3
 ; GFX9-NEXT:    v_sub_u32_e32 v2, s7, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s4, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s5, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GFX9-NEXT:    v_and_b32_e32 v0, 0xff, v0
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s5, v2
@@ -2063,19 +2063,19 @@ define amdgpu_kernel void @udivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX10-NEXT:    v_add_nc_u32_e32 v5, 1, v1
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, s3, v2
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v3, s0, v3
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s2, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s2, v2
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, s2, v2
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s1, v3
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s1, v3
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s1, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v4, 1, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s2, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s2, v2
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, s2, v2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v5, 1, v1
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s1, v3
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s1, v3
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s1, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
 ; GFX10-NEXT:    v_mov_b32_e32 v4, 0xff
@@ -2124,12 +2124,12 @@ define amdgpu_kernel void @udiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %ou
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v2, s5
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s4, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s5, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s5, v3
 ; GFX8-NEXT:    flat_store_short v[0:1], v2
@@ -2159,12 +2159,12 @@ define amdgpu_kernel void @udiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %ou
 ; GFX9-NEXT:    v_mul_lo_u32 v1, v0, s4
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v1, s5, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -2193,11 +2193,11 @@ define amdgpu_kernel void @udiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %ou
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s0, v1
 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s4, v1
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s4, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s4, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v2, 1, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s4, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s4, v1
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s4, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 0
@@ -2247,24 +2247,24 @@ define amdgpu_kernel void @udivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_mul_lo_u32 v2, v0, s2
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s0, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s2, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s2, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s2, v2
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v1, s3
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s2, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s8, v3
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s3, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s3, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s3, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s3, v3
 ; GFX8-NEXT:    v_and_b32_e32 v1, 0xffff, v1
@@ -2314,22 +2314,22 @@ define amdgpu_kernel void @udivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_mul_lo_u32 v3, v1, s2
 ; GFX9-NEXT:    v_add_u32_e32 v5, 1, v1
 ; GFX9-NEXT:    v_sub_u32_e32 v2, s0, v2
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s3, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s3, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s3, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s3, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s3, v2
 ; GFX9-NEXT:    v_sub_u32_e32 v3, s1, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s3, v2
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s2, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s2, v3
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[0:1]
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s2, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s2, v3
 ; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
@@ -2377,18 +2377,18 @@ define amdgpu_kernel void @udivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX10-NEXT:    v_add_nc_u32_e32 v6, 1, v1
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, s3, v2
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v3, s0, v3
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s2, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s2, v2
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v5, s2, v2
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s1, v3
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s1, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v4, s1, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v6, s0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v5, 1, v0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s2, v2
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s2, v2
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v4, s2, v2
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s1, v3
+; GFX10-NEXT:    v_cmp_lt_u32_e64 s0, s1, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v5, 1, v1
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, s1, v3
@@ -2436,12 +2436,12 @@ define amdgpu_kernel void @udivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v2, s5
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s4, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s5, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s5, v3
 ; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
@@ -2473,12 +2473,12 @@ define amdgpu_kernel void @udivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %
 ; GFX9-NEXT:    v_mul_lo_u32 v1, v0, s4
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v1, s5, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -2508,12 +2508,12 @@ define amdgpu_kernel void @udivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %
 ; GFX10-NEXT:    v_add_nc_u32_e32 v2, 1, v0
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s0, v1
 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s4, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s4, v1
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s4, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v2, 1, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s4, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s4, v1
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s4, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
@@ -2557,12 +2557,12 @@ define amdgpu_kernel void @udivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v2, s5
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s4, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s5, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
+; GFX8-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s5, v3
 ; GFX8-NEXT:    v_and_b32_e32 v2, 0x7ffffff, v2
@@ -2594,12 +2594,12 @@ define amdgpu_kernel void @udivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    v_mul_lo_u32 v1, v0, s4
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v1, s5, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -2629,12 +2629,12 @@ define amdgpu_kernel void @udivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX10-NEXT:    v_add_nc_u32_e32 v2, 1, v0
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s0, v1
 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s4, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s4, v1
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s4, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v2, 1, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s4, v1
+; GFX10-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s4, v1
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s4, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
index 24ec4fa48f778..2b1cde5522f27 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
@@ -70,10 +70,10 @@ define amdgpu_ps i32 @s_urem_i32(i32 inreg %num, i32 inreg %den) {
 ; GISEL-NEXT:    v_mul_lo_u32 v0, v0, s1
 ; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
 ; GISEL-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
-; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
+; GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s1, v0
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GISEL-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
-; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
+; GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s1, v0
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GISEL-NEXT:    v_readfirstlane_b32 s0, v0
 ; GISEL-NEXT:    ; return to shader part epilog
@@ -92,10 +92,10 @@ define amdgpu_ps i32 @s_urem_i32(i32 inreg %num, i32 inreg %den) {
 ; CGP-NEXT:    v_mul_lo_u32 v0, v0, s1
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
 ; CGP-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
+; CGP-NEXT:    v_cmp_lt_u32_e32 vcc, s1, v0
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; CGP-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
+; CGP-NEXT:    v_cmp_lt_u32_e32 vcc, s1, v0
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; CGP-NEXT:    v_readfirstlane_b32 s0, v0
 ; CGP-NEXT:    ; return to shader part epilog
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index f6a228614a27e..15518c11c87cb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -294,19 +294,19 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, s0, v6
 ; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v1, vcc
 ; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], s1, v1
-; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s2, v4
+; CHECK-NEXT:    v_cmp_lt_u32_e64 s[4:5], s2, v4
 ; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
-; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s3, v3
+; CHECK-NEXT:    v_cmp_lt_u32_e64 s[4:5], s3, v3
 ; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
 ; CHECK-NEXT:    v_subb_u32_e32 v0, vcc, v1, v0, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v3
 ; CHECK-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc
 ; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, s2, v4
 ; CHECK-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v3
+; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v3
 ; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; CHECK-NEXT:    v_subrev_i32_e32 v6, vcc, s2, v3
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s3, v0
+; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s3, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v0
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v7, v5, vcc
@@ -333,10 +333,10 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mul_lo_u32 v0, v0, s2
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
 ; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, s2, v0
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v0
+; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v0
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, s2, v0
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v0
+; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v0
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; CHECK-NEXT:  .LBB1_5:
 ; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
index 9c2fabce4bcde..ec429b7d4c27d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
@@ -12,7 +12,7 @@ define amdgpu_kernel void @v3i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX906-NEXT:    global_load_dword v3, v2, s[0:1]
 ; GFX906-NEXT:    v_mov_b32_e32 v1, 0xff
-; GFX906-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v0
+; GFX906-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v0
 ; GFX906-NEXT:    s_waitcnt vmcnt(0)
 ; GFX906-NEXT:    v_and_b32_e32 v6, 0xff, v3
 ; GFX906-NEXT:    v_lshlrev_b32_sdwa v7, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
@@ -64,7 +64,7 @@ define amdgpu_kernel void @v4i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
 ; GFX906-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX906-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX906-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX906-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v0
+; GFX906-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v0
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX906-NEXT:    global_load_dword v1, v2, s[0:1]
 ; GFX906-NEXT:    s_and_saveexec_b64 s[0:1], vcc
@@ -100,7 +100,7 @@ define amdgpu_kernel void @v5i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
 ; GFX906-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX906-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX906-NEXT:    v_lshlrev_b32_e32 v3, 3, v0
-; GFX906-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v0
+; GFX906-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v0
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX906-NEXT:    global_load_dwordx2 v[1:2], v3, s[0:1]
 ; GFX906-NEXT:    s_waitcnt vmcnt(0)
@@ -145,7 +145,7 @@ define amdgpu_kernel void @v8i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
 ; GFX906-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX906-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX906-NEXT:    v_lshlrev_b32_e32 v3, 3, v0
-; GFX906-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v0
+; GFX906-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v0
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX906-NEXT:    global_load_dwordx2 v[1:2], v3, s[0:1]
 ; GFX906-NEXT:    s_and_saveexec_b64 s[0:1], vcc
@@ -181,7 +181,7 @@ define amdgpu_kernel void @v16i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1
 ; GFX906-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX906-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX906-NEXT:    v_lshlrev_b32_e32 v5, 4, v0
-; GFX906-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v0
+; GFX906-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v0
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX906-NEXT:    global_load_dwordx4 v[1:4], v5, s[0:1]
 ; GFX906-NEXT:    s_and_saveexec_b64 s[0:1], vcc
@@ -217,7 +217,7 @@ define amdgpu_kernel void @v32i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1
 ; GFX906-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX906-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX906-NEXT:    v_lshlrev_b32_e32 v9, 5, v0
-; GFX906-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v0
+; GFX906-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v0
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX906-NEXT:    global_load_dwordx4 v[1:4], v9, s[0:1]
 ; GFX906-NEXT:    global_load_dwordx4 v[5:8], v9, s[0:1] offset:16
@@ -265,7 +265,7 @@ define amdgpu_kernel void @v256i8_liveout(ptr addrspace(1) %src1, ptr addrspace(
 ; GFX906-NEXT:    s_mov_b32 s15, 0xe00000
 ; GFX906-NEXT:    s_add_u32 s12, s12, s11
 ; GFX906-NEXT:    s_addc_u32 s13, s13, 0
-; GFX906-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v0
+; GFX906-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v0
 ; GFX906-NEXT:    s_waitcnt vmcnt(0)
 ; GFX906-NEXT:    buffer_store_dword v5, off, s[12:15], 0 ; 4-byte Folded Spill
 ; GFX906-NEXT:    s_nop 0
@@ -481,7 +481,7 @@ define amdgpu_kernel void @v8i8_phi_chain(ptr addrspace(1) %src1, ptr addrspace(
 ; GFX906:       ; %bb.0: ; %entry
 ; GFX906-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
 ; GFX906-NEXT:    v_lshlrev_b32_e32 v3, 3, v0
-; GFX906-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v0
+; GFX906-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v0
 ; GFX906-NEXT:    s_xor_b64 s[0:1], vcc, -1
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX906-NEXT:    global_load_dwordx2 v[1:2], v3, s[8:9]
@@ -489,7 +489,7 @@ define amdgpu_kernel void @v8i8_phi_chain(ptr addrspace(1) %src1, ptr addrspace(
 ; GFX906-NEXT:    s_cbranch_execz .LBB8_2
 ; GFX906-NEXT:  ; %bb.1: ; %bb.1
 ; GFX906-NEXT:    global_load_dwordx2 v[1:2], v3, s[10:11]
-; GFX906-NEXT:    v_cmp_gt_u32_e32 vcc, 7, v0
+; GFX906-NEXT:    v_cmp_ge_u32_e32 vcc, 7, v0
 ; GFX906-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
 ; GFX906-NEXT:    s_and_b64 s[4:5], exec, vcc
 ; GFX906-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
@@ -535,7 +535,7 @@ define amdgpu_kernel void @v8i8_multi_block(ptr addrspace(1) %src1, ptr addrspac
 ; GFX906:       ; %bb.0: ; %entry
 ; GFX906-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
 ; GFX906-NEXT:    v_lshlrev_b32_e32 v5, 3, v0
-; GFX906-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v0
+; GFX906-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v0
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX906-NEXT:    global_load_dwordx2 v[3:4], v5, s[8:9]
 ; GFX906-NEXT:    s_waitcnt vmcnt(0)
@@ -545,7 +545,7 @@ define amdgpu_kernel void @v8i8_multi_block(ptr addrspace(1) %src1, ptr addrspac
 ; GFX906-NEXT:    s_cbranch_execz .LBB9_4
 ; GFX906-NEXT:  ; %bb.1: ; %bb.1
 ; GFX906-NEXT:    global_load_dwordx2 v[1:2], v5, s[10:11]
-; GFX906-NEXT:    v_cmp_gt_u32_e32 vcc, 7, v0
+; GFX906-NEXT:    v_cmp_ge_u32_e32 vcc, 7, v0
 ; GFX906-NEXT:    s_and_saveexec_b64 s[2:3], vcc
 ; GFX906-NEXT:    s_cbranch_execz .LBB9_3
 ; GFX906-NEXT:  ; %bb.2: ; %bb.2
@@ -588,7 +588,7 @@ define amdgpu_kernel void @v32i8_loop_carried(ptr addrspace(1) %src1, ptr addrsp
 ; GFX906-NEXT:    v_lshlrev_b32_e32 v1, 5, v0
 ; GFX906-NEXT:    v_mov_b32_e32 v3, 8
 ; GFX906-NEXT:    v_mov_b32_e32 v2, 0xff
-; GFX906-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
+; GFX906-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX906-NEXT:    global_load_dword v1, v1, s[0:1]
 ; GFX906-NEXT:    s_mov_b64 s[0:1], 0
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
index 3160e38df5e3f..98b7e365ff229 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
@@ -603,7 +603,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
 ; GFX908-NEXT:    v_add_co_u32_sdwa v2, vcc, v2, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
 ; GFX908-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
 ; GFX908-NEXT:    s_add_u32 s20, s20, s4
-; GFX908-NEXT:    v_cmp_lt_i64_e64 s[24:25], -1, v[2:3]
+; GFX908-NEXT:    v_cmp_le_i64_e64 s[24:25], -1, v[2:3]
 ; GFX908-NEXT:    s_addc_u32 s21, s21, s5
 ; GFX908-NEXT:    s_mov_b64 s[22:23], 0
 ; GFX908-NEXT:    s_andn2_b64 vcc, exec, s[24:25]
@@ -765,7 +765,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
 ; GFX90A-NEXT:    s_add_u32 s20, s20, s4
 ; GFX90A-NEXT:    s_addc_u32 s21, s21, s5
-; GFX90A-NEXT:    v_cmp_lt_i64_e64 s[24:25], -1, v[4:5]
+; GFX90A-NEXT:    v_cmp_le_i64_e64 s[24:25], -1, v[4:5]
 ; GFX90A-NEXT:    s_mov_b64 s[22:23], 0
 ; GFX90A-NEXT:    s_andn2_b64 vcc, exec, s[24:25]
 ; GFX90A-NEXT:    s_cbranch_vccz .LBB3_9
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-branch-weight-metadata.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-branch-weight-metadata.ll
index 1da8cd6646d53..1378093923642 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-branch-weight-metadata.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-branch-weight-metadata.ll
@@ -148,7 +148,7 @@ define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX9-LABEL: divergent_br_no_metadata:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v0
+; GFX9-NEXT:    v_cmp_le_i32_e32 vcc, 0, v0
 ; GFX9-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX9-NEXT:    s_cbranch_execz .LBB3_2
 ; GFX9-NEXT:  ; %bb.1: ; %if.then
@@ -167,7 +167,7 @@ define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX1010-LABEL: divergent_br_no_metadata:
 ; GFX1010:       ; %bb.0: ; %entry
 ; GFX1010-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0, v0
+; GFX1010-NEXT:    v_cmp_le_i32_e32 vcc_lo, 0, v0
 ; GFX1010-NEXT:    s_and_saveexec_b32 s8, vcc_lo
 ; GFX1010-NEXT:    s_cbranch_execz .LBB3_2
 ; GFX1010-NEXT:  ; %bb.1: ; %if.then
@@ -188,7 +188,7 @@ define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX1030:       ; %bb.0: ; %entry
 ; GFX1030-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1030-NEXT:    s_mov_b32 s8, exec_lo
-; GFX1030-NEXT:    v_cmpx_lt_i32_e32 0, v0
+; GFX1030-NEXT:    v_cmpx_le_i32_e32 0, v0
 ; GFX1030-NEXT:    s_cbranch_execz .LBB3_2
 ; GFX1030-NEXT:  ; %bb.1: ; %if.then
 ; GFX1030-NEXT:    v_mov_b32_e32 v0, s16
@@ -219,7 +219,7 @@ define void @divergent_br_same_weight(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX9-LABEL: divergent_br_same_weight:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v0
+; GFX9-NEXT:    v_cmp_le_i32_e32 vcc, 0, v0
 ; GFX9-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX9-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX9-NEXT:  ; %bb.1: ; %if.then
@@ -238,7 +238,7 @@ define void @divergent_br_same_weight(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX1010-LABEL: divergent_br_same_weight:
 ; GFX1010:       ; %bb.0: ; %entry
 ; GFX1010-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0, v0
+; GFX1010-NEXT:    v_cmp_le_i32_e32 vcc_lo, 0, v0
 ; GFX1010-NEXT:    s_and_saveexec_b32 s8, vcc_lo
 ; GFX1010-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1010-NEXT:  ; %bb.1: ; %if.then
@@ -259,7 +259,7 @@ define void @divergent_br_same_weight(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX1030:       ; %bb.0: ; %entry
 ; GFX1030-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1030-NEXT:    s_mov_b32 s8, exec_lo
-; GFX1030-NEXT:    v_cmpx_lt_i32_e32 0, v0
+; GFX1030-NEXT:    v_cmpx_le_i32_e32 0, v0
 ; GFX1030-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1030-NEXT:  ; %bb.1: ; %if.then
 ; GFX1030-NEXT:    v_mov_b32_e32 v0, s16
@@ -290,7 +290,7 @@ define void @divergent_br_then_likely(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX9-LABEL: divergent_br_then_likely:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v0
+; GFX9-NEXT:    v_cmp_le_i32_e32 vcc, 0, v0
 ; GFX9-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX9-NEXT:  ; %bb.1: ; %if.then
 ; GFX9-NEXT:    s_mov_b32 s7, s20
@@ -308,7 +308,7 @@ define void @divergent_br_then_likely(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX1010-LABEL: divergent_br_then_likely:
 ; GFX1010:       ; %bb.0: ; %entry
 ; GFX1010-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0, v0
+; GFX1010-NEXT:    v_cmp_le_i32_e32 vcc_lo, 0, v0
 ; GFX1010-NEXT:    s_and_saveexec_b32 s8, vcc_lo
 ; GFX1010-NEXT:  ; %bb.1: ; %if.then
 ; GFX1010-NEXT:    v_mov_b32_e32 v0, s16
@@ -328,7 +328,7 @@ define void @divergent_br_then_likely(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX1030:       ; %bb.0: ; %entry
 ; GFX1030-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1030-NEXT:    s_mov_b32 s8, exec_lo
-; GFX1030-NEXT:    v_cmpx_lt_i32_e32 0, v0
+; GFX1030-NEXT:    v_cmpx_le_i32_e32 0, v0
 ; GFX1030-NEXT:  ; %bb.1: ; %if.then
 ; GFX1030-NEXT:    v_mov_b32_e32 v0, s16
 ; GFX1030-NEXT:    v_mov_b32_e32 v1, s21
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
index b7097a9557b75..e3713efc8fef5 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
@@ -7892,9 +7892,9 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
 ; GFX6-NEXT:    v_subb_u32_e64 v4, s[0:1], v4, v5, vcc
 ; GFX6-NEXT:    v_subrev_i32_e64 v5, s[0:1], s10, v3
 ; GFX6-NEXT:    v_subbrev_u32_e64 v4, s[0:1], 0, v4, s[0:1]
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v4
+; GFX6-NEXT:    v_cmp_lt_u32_e64 s[0:1], s11, v4
 ; GFX6-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[0:1]
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v5
+; GFX6-NEXT:    v_cmp_lt_u32_e64 s[0:1], s10, v5
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v4
 ; GFX6-NEXT:    v_cndmask_b32_e64 v4, v6, v5, s[0:1]
@@ -7907,9 +7907,9 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, v6, v8, s[0:1]
 ; GFX6-NEXT:    v_mov_b32_e32 v6, s3
 ; GFX6-NEXT:    v_subb_u32_e32 v2, vcc, v6, v2, vcc
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s11, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s11, v2
 ; GFX6-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s10, v3
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s10, v3
 ; GFX6-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, s11, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v6, v3, vcc
@@ -8034,7 +8034,7 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
 ; GFX9-NEXT:    s_subb_u32 s10, s10, 0
 ; GFX9-NEXT:    s_cmp_ge_u32 s10, s7
 ; GFX9-NEXT:    s_cselect_b32 s15, -1, 0
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s6, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s6, v2
 ; GFX9-NEXT:    s_cmp_eq_u32 s10, s7
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s15
@@ -8055,7 +8055,7 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
 ; GFX9-NEXT:    s_subb_u32 s0, s11, s14
 ; GFX9-NEXT:    s_cmp_ge_u32 s0, s7
 ; GFX9-NEXT:    s_cselect_b32 s1, -1, 0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v1
 ; GFX9-NEXT:    s_cmp_eq_u32 s0, s7
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v4, s1
@@ -8374,9 +8374,9 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; GFX6-NEXT:    v_subb_u32_e64 v4, s[0:1], v4, v5, vcc
 ; GFX6-NEXT:    v_subrev_i32_e64 v5, s[0:1], s2, v3
 ; GFX6-NEXT:    v_subbrev_u32_e64 v4, s[0:1], 0, v4, s[0:1]
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v4
+; GFX6-NEXT:    v_cmp_lt_u32_e64 s[0:1], s3, v4
 ; GFX6-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[0:1]
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v5
+; GFX6-NEXT:    v_cmp_lt_u32_e64 s[0:1], s2, v5
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[0:1], s3, v4
 ; GFX6-NEXT:    v_cndmask_b32_e64 v4, v6, v5, s[0:1]
@@ -8397,9 +8397,9 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; GFX6-NEXT:    v_subb_u32_e32 v2, vcc, v6, v2, vcc
 ; GFX6-NEXT:    v_cvt_f32_u32_e32 v6, s12
 ; GFX6-NEXT:    v_cvt_f32_u32_e32 v7, s13
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s3, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s3, v2
 ; GFX6-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s2, v3
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v3
 ; GFX6-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v7
 ; GFX6-NEXT:    v_rcp_f32_e32 v6, v6
 ; GFX6-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
@@ -8496,9 +8496,9 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; GFX6-NEXT:    v_subb_u32_e64 v6, s[0:1], v6, v7, vcc
 ; GFX6-NEXT:    v_subrev_i32_e64 v7, s[0:1], s12, v5
 ; GFX6-NEXT:    v_subbrev_u32_e64 v6, s[0:1], 0, v6, s[0:1]
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s13, v6
+; GFX6-NEXT:    v_cmp_lt_u32_e64 s[0:1], s13, v6
 ; GFX6-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[0:1]
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v7
+; GFX6-NEXT:    v_cmp_lt_u32_e64 s[0:1], s12, v7
 ; GFX6-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[0:1]
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[0:1], s13, v6
 ; GFX6-NEXT:    v_cndmask_b32_e64 v6, v8, v7, s[0:1]
@@ -8511,9 +8511,9 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; GFX6-NEXT:    v_cndmask_b32_e64 v7, v8, v10, s[0:1]
 ; GFX6-NEXT:    v_mov_b32_e32 v8, s11
 ; GFX6-NEXT:    v_subb_u32_e32 v4, vcc, v8, v4, vcc
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s13, v4
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s13, v4
 ; GFX6-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s12, v5
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s12, v5
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, s13, v4
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v8, v5, vcc
@@ -8639,7 +8639,7 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; GFX9-NEXT:    s_subb_u32 s8, s8, 0
 ; GFX9-NEXT:    s_cmp_ge_u32 s8, s15
 ; GFX9-NEXT:    s_cselect_b32 s19, -1, 0
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s14, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s14, v1
 ; GFX9-NEXT:    s_cmp_eq_u32 s8, s15
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s19
@@ -8660,7 +8660,7 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; GFX9-NEXT:    s_subb_u32 s0, s9, s18
 ; GFX9-NEXT:    s_cmp_ge_u32 s0, s15
 ; GFX9-NEXT:    s_cselect_b32 s1, -1, 0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s14, v0
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s14, v0
 ; GFX9-NEXT:    s_cmp_eq_u32 s0, s15
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
@@ -8779,7 +8779,7 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; GFX9-NEXT:    s_subb_u32 s10, s10, 0
 ; GFX9-NEXT:    s_cmp_ge_u32 s10, s7
 ; GFX9-NEXT:    s_cselect_b32 s15, -1, 0
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s6, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s6, v3
 ; GFX9-NEXT:    s_cmp_eq_u32 s10, s7
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_mov_b32_e32 v5, s15
@@ -8800,7 +8800,7 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; GFX9-NEXT:    s_subb_u32 s0, s11, s14
 ; GFX9-NEXT:    s_cmp_ge_u32 s0, s7
 ; GFX9-NEXT:    s_cselect_b32 s1, -1, 0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v2
 ; GFX9-NEXT:    s_cmp_eq_u32 s0, s7
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v6, s1
@@ -9083,9 +9083,9 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
 ; GFX6-NEXT:    v_subb_u32_e64 v2, s[0:1], v2, v3, vcc
 ; GFX6-NEXT:    v_subrev_i32_e64 v4, s[0:1], s8, v0
 ; GFX6-NEXT:    v_subbrev_u32_e64 v5, s[2:3], 0, v2, s[0:1]
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[2:3], s9, v5
+; GFX6-NEXT:    v_cmp_lt_u32_e64 s[2:3], s9, v5
 ; GFX6-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[2:3]
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[2:3], s8, v4
+; GFX6-NEXT:    v_cmp_lt_u32_e64 s[2:3], s8, v4
 ; GFX6-NEXT:    v_subb_u32_e64 v2, s[0:1], v2, v3, s[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[2:3]
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[2:3], s9, v5
@@ -9096,9 +9096,9 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
 ; GFX6-NEXT:    v_cndmask_b32_e64 v3, v4, v3, s[0:1]
 ; GFX6-NEXT:    v_mov_b32_e32 v4, s13
 ; GFX6-NEXT:    v_subb_u32_e32 v1, vcc, v4, v1, vcc
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s9, v1
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s9, v1
 ; GFX6-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v0
 ; GFX6-NEXT:    v_cndmask_b32_e64 v2, v5, v2, s[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, s9, v1
@@ -9223,7 +9223,7 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
 ; GFX9-NEXT:    s_subb_u32 s12, s10, 0
 ; GFX9-NEXT:    s_cmp_ge_u32 s12, s7
 ; GFX9-NEXT:    s_cselect_b32 s13, -1, 0
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[2:3], s6, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[2:3], s6, v2
 ; GFX9-NEXT:    s_cmp_eq_u32 s12, s7
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, s13
@@ -9243,7 +9243,7 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
 ; GFX9-NEXT:    s_subb_u32 s0, s11, s5
 ; GFX9-NEXT:    s_cmp_ge_u32 s0, s7
 ; GFX9-NEXT:    s_cselect_b32 s1, -1, 0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v1
 ; GFX9-NEXT:    s_cmp_eq_u32 s0, s7
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v5, s1
@@ -9451,9 +9451,9 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; GFX6-NEXT:    v_subb_u32_e64 v2, s[0:1], v2, v3, vcc
 ; GFX6-NEXT:    v_subrev_i32_e64 v4, s[0:1], s14, v0
 ; GFX6-NEXT:    v_subbrev_u32_e64 v5, s[2:3], 0, v2, s[0:1]
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[2:3], s15, v5
+; GFX6-NEXT:    v_cmp_lt_u32_e64 s[2:3], s15, v5
 ; GFX6-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[2:3]
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[2:3], s14, v4
+; GFX6-NEXT:    v_cmp_lt_u32_e64 s[2:3], s14, v4
 ; GFX6-NEXT:    v_subb_u32_e64 v2, s[0:1], v2, v3, s[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[2:3]
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[2:3], s15, v5
@@ -9472,9 +9472,9 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; GFX6-NEXT:    v_subb_u32_e32 v1, vcc, v4, v1, vcc
 ; GFX6-NEXT:    v_cvt_f32_u32_e32 v4, s8
 ; GFX6-NEXT:    v_cvt_f32_u32_e32 v5, s9
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s15, v1
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s15, v1
 ; GFX6-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s14, v0
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s14, v0
 ; GFX6-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
 ; GFX6-NEXT:    v_rcp_f32_e32 v4, v4
 ; GFX6-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
@@ -9571,9 +9571,9 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; GFX6-NEXT:    v_subb_u32_e64 v4, s[0:1], v4, v5, vcc
 ; GFX6-NEXT:    v_subrev_i32_e64 v6, s[0:1], s8, v2
 ; GFX6-NEXT:    v_subbrev_u32_e64 v7, s[2:3], 0, v4, s[0:1]
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[2:3], s9, v7
+; GFX6-NEXT:    v_cmp_lt_u32_e64 s[2:3], s9, v7
 ; GFX6-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[2:3]
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[2:3], s8, v6
+; GFX6-NEXT:    v_cmp_lt_u32_e64 s[2:3], s8, v6
 ; GFX6-NEXT:    v_subb_u32_e64 v4, s[0:1], v4, v5, s[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[2:3]
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[2:3], s9, v7
@@ -9584,9 +9584,9 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, v6, v5, s[0:1]
 ; GFX6-NEXT:    v_mov_b32_e32 v6, s11
 ; GFX6-NEXT:    v_subb_u32_e32 v3, vcc, v6, v3, vcc
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s9, v3
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s9, v3
 ; GFX6-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s8, v2
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v2
 ; GFX6-NEXT:    v_cndmask_b32_e64 v4, v7, v4, s[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, s9, v3
@@ -9712,7 +9712,7 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; GFX9-NEXT:    s_subb_u32 s9, s4, 0
 ; GFX9-NEXT:    s_cmp_ge_u32 s9, s13
 ; GFX9-NEXT:    s_cselect_b32 s17, -1, 0
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[2:3], s12, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[2:3], s12, v1
 ; GFX9-NEXT:    s_cmp_eq_u32 s9, s13
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s17
@@ -9732,7 +9732,7 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; GFX9-NEXT:    s_subb_u32 s0, s5, s8
 ; GFX9-NEXT:    s_cmp_ge_u32 s0, s13
 ; GFX9-NEXT:    s_cselect_b32 s1, -1, 0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s12, v0
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s12, v0
 ; GFX9-NEXT:    s_cmp_eq_u32 s0, s13
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v5, s1
@@ -9849,7 +9849,7 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; GFX9-NEXT:    s_subb_u32 s12, s10, 0
 ; GFX9-NEXT:    s_cmp_ge_u32 s12, s5
 ; GFX9-NEXT:    s_cselect_b32 s13, -1, 0
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[2:3], s4, v3
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[2:3], s4, v3
 ; GFX9-NEXT:    s_cmp_eq_u32 s12, s5
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v6, s13
@@ -9869,7 +9869,7 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
 ; GFX9-NEXT:    s_subb_u32 s0, s11, s9
 ; GFX9-NEXT:    s_cmp_ge_u32 s0, s5
 ; GFX9-NEXT:    s_cselect_b32 s1, -1, 0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v2
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v2
 ; GFX9-NEXT:    s_cmp_eq_u32 s0, s5
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v7, s1
@@ -10099,6 +10099,15 @@ define <2 x i64> @srem_zero_zero() {
 ; GCN-LABEL: kernel:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_endpgm
+; GFX6-LABEL: srem_zero_zero:
+; GFX6:       ; %bb.0: ; %entry
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: srem_zero_zero:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %B = srem <2 x i64> zeroinitializer, zeroinitializer
   ret <2 x i64> %B
diff --git a/llvm/test/CodeGen/AMDGPU/ashr64_reduce.ll b/llvm/test/CodeGen/AMDGPU/ashr64_reduce.ll
index 78942bfc68d63..fa1e40535a252 100644
--- a/llvm/test/CodeGen/AMDGPU/ashr64_reduce.ll
+++ b/llvm/test/CodeGen/AMDGPU/ashr64_reduce.ll
@@ -537,10 +537,10 @@ define i64 @ashr_maxmin(i64 %arg0, i64 noundef %arg1) {
 ; CHECK-LABEL: ashr_maxmin:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[2:3]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[2:3]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v2, 32, v2, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[2:3]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[2:3]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v2, 63, v2, vcc
 ; CHECK-NEXT:    v_ashrrev_i64 v[0:1], v2, v[0:1]
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -554,15 +554,15 @@ define <2 x i64> @ashr_v2_maxmin(<2 x i64> %arg0, <2 x i64> noundef %arg1) {
 ; CHECK-LABEL: ashr_v2_maxmin:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[4:5]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[4:5]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v4, 32, v4, vcc
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[6:7]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[6:7]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v6, 32, v6, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[6:7]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[6:7]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v6, 63, v6, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[4:5]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[4:5]
 ; CHECK-NEXT:    v_ashrrev_i64 v[2:3], v6, v[2:3]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v4, 63, v4, vcc
 ; CHECK-NEXT:    v_ashrrev_i64 v[0:1], v4, v[0:1]
@@ -577,21 +577,21 @@ define <3 x i64> @ashr_v3_maxmin(<3 x i64> %arg0, <3 x i64> noundef %arg1) {
 ; CHECK-LABEL: ashr_v3_maxmin:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[6:7]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[6:7]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v6, 32, v6, vcc
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[8:9]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[8:9]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v8, 32, v8, vcc
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[10:11]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[10:11]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v11, 0, v11, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v10, 32, v10, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[10:11]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[10:11]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v10, 63, v10, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[8:9]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[8:9]
 ; CHECK-NEXT:    v_ashrrev_i64 v[4:5], v10, v[4:5]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v8, 63, v8, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[6:7]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[6:7]
 ; CHECK-NEXT:    v_ashrrev_i64 v[2:3], v8, v[2:3]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v6, 63, v6, vcc
 ; CHECK-NEXT:    v_ashrrev_i64 v[0:1], v6, v[0:1]
@@ -606,27 +606,27 @@ define <4 x i64> @ashr_v4_maxmin(<4 x i64> %arg0, <4 x i64> noundef %arg1) {
 ; CHECK-LABEL: ashr_v4_maxmin:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[8:9]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[8:9]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v8, 32, v8, vcc
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[10:11]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[10:11]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v11, 0, v11, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v10, 32, v10, vcc
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[12:13]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[12:13]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v13, 0, v13, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v12, 32, v12, vcc
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[14:15]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[14:15]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v15, 0, v15, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v14, 32, v14, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[14:15]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[14:15]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v14, 63, v14, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[12:13]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[12:13]
 ; CHECK-NEXT:    v_ashrrev_i64 v[6:7], v14, v[6:7]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v12, 63, v12, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[10:11]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[10:11]
 ; CHECK-NEXT:    v_ashrrev_i64 v[4:5], v12, v[4:5]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v10, 63, v10, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[8:9]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[8:9]
 ; CHECK-NEXT:    v_ashrrev_i64 v[2:3], v10, v[2:3]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v8, 63, v8, vcc
 ; CHECK-NEXT:    v_ashrrev_i64 v[0:1], v8, v[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
index b03ade4b527e6..fb9d982483b0c 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -986,7 +986,7 @@ bb3:
 define amdgpu_kernel void @uniform_inside_divergent(ptr addrspace(1) %out, i32 %cond) #0 {
 ; GCN-LABEL: uniform_inside_divergent:
 ; GCN:       ; %bb.0: ; %entry
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v0
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, 16, v0
 ; GCN-NEXT:    s_and_saveexec_b64 s[6:7], vcc
 ; GCN-NEXT:    s_cbranch_execnz .LBB8_1
 ; GCN-NEXT:  ; %bb.4: ; %entry
@@ -1019,7 +1019,7 @@ define amdgpu_kernel void @uniform_inside_divergent(ptr addrspace(1) %out, i32 %
 ; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX11-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cmpx_gt_u32_e32 16, v0
+; GFX11-NEXT:    v_cmpx_ge_u32_e32 16, v0
 ; GFX11-NEXT:    s_cbranch_execz .LBB8_3
 ; GFX11-NEXT:  ; %bb.1: ; %if
 ; GFX11-NEXT:    s_clause 0x1
@@ -1043,7 +1043,7 @@ define amdgpu_kernel void @uniform_inside_divergent(ptr addrspace(1) %out, i32 %
 ; GFX12-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX12-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_cmpx_gt_u32_e32 16, v0
+; GFX12-NEXT:    v_cmpx_ge_u32_e32 16, v0
 ; GFX12-NEXT:    s_cbranch_execz .LBB8_3
 ; GFX12-NEXT:  ; %bb.1: ; %if
 ; GFX12-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll
index c69e12731e10d..0d727a8891e3b 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll
@@ -579,7 +579,7 @@ define amdgpu_kernel void @memcpy_known(ptr addrspace(7) %src, ptr addrspace(7)
 ; GISEL-GFX1100-NEXT:    buffer_store_b128 v[57:60], v65, s[12:15], 0 offen offset:224
 ; GISEL-GFX1100-NEXT:    s_waitcnt vmcnt(0)
 ; GISEL-GFX1100-NEXT:    buffer_store_b128 v[61:64], v65, s[12:15], 0 offen offset:240
-; GISEL-GFX1100-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 0x2000, v0
+; GISEL-GFX1100-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 0x2000, v0
 ; GISEL-GFX1100-NEXT:    s_cbranch_vccnz .LBB0_1
 ; GISEL-GFX1100-NEXT:  ; %bb.2: ; %memcpy-split
 ; GISEL-GFX1100-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll b/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll
index 07816f1ed6a65..d3266ec8696f5 100644
--- a/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll
@@ -7,7 +7,7 @@ define void @f(i32 %arg, ptr %ptr) {
 ; ISA:       ; %bb.0: ; %bb
 ; ISA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; ISA-NEXT:    s_mov_b64 s[4:5], 0
-; ISA-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v0
+; ISA-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v0
 ; ISA-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
 ; ISA-NEXT:    v_mov_b32_e32 v6, 0
 ; ISA-NEXT:    s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
index d0ae30f813a72..0adb6c9b96d58 100644
--- a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
+++ b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
@@ -766,7 +766,7 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
 ; CISI-NEXT:    v_mov_b32_e32 v1, s9
 ; CISI-NEXT:    v_add_i32_e32 v0, vcc, s8, v0
 ; CISI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; CISI-NEXT:    v_cmp_gt_u64_e32 vcc, s[8:9], v[0:1]
+; CISI-NEXT:    v_cmp_ge_u64_e32 vcc, s[8:9], v[0:1]
 ; CISI-NEXT:    s_mov_b32 s5, s1
 ; CISI-NEXT:    s_mov_b32 s0, s2
 ; CISI-NEXT:    s_mov_b32 s1, s3
@@ -787,7 +787,7 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
 ; VI-NEXT:    v_mov_b32_e32 v6, s5
 ; VI-NEXT:    v_add_u32_e32 v5, vcc, s4, v0
 ; VI-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
-; VI-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[5:6]
+; VI-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[5:6]
 ; VI-NEXT:    v_mov_b32_e32 v2, s1
 ; VI-NEXT:    v_mov_b32_e32 v3, s2
 ; VI-NEXT:    v_mov_b32_e32 v4, s3
@@ -805,7 +805,7 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s7
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
+; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
 ; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX9-NEXT:    global_store_byte v2, v0, s[2:3]
@@ -820,7 +820,7 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
 ; GFX1010-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1010-NEXT:    v_add_co_u32 v0, s4, s6, v0
 ; GFX1010-NEXT:    v_add_co_ci_u32_e64 v1, s4, s7, 0, s4
-; GFX1010-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[6:7], v[0:1]
+; GFX1010-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[6:7], v[0:1]
 ; GFX1010-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
 ; GFX1010-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1010-NEXT:    global_store_byte v2, v3, s[2:3]
@@ -835,7 +835,7 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
 ; GFX1030W32-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1030W32-NEXT:    v_add_co_u32 v0, s4, s6, v0
 ; GFX1030W32-NEXT:    v_add_co_ci_u32_e64 v1, null, s7, 0, s4
-; GFX1030W32-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[6:7], v[0:1]
+; GFX1030W32-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[6:7], v[0:1]
 ; GFX1030W32-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
 ; GFX1030W32-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1030W32-NEXT:    global_store_byte v2, v3, s[2:3]
@@ -850,7 +850,7 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
 ; GFX1030W64-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1030W64-NEXT:    v_add_co_u32 v0, s[4:5], s6, v0
 ; GFX1030W64-NEXT:    v_add_co_ci_u32_e64 v1, null, s7, 0, s[4:5]
-; GFX1030W64-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
+; GFX1030W64-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
 ; GFX1030W64-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
 ; GFX1030W64-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1030W64-NEXT:    global_store_byte v2, v3, s[2:3]
@@ -868,7 +868,7 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
 ; GFX11-NEXT:    v_add_co_u32 v0, s4, s6, v0
 ; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, s7, 0, s4
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[6:7], v[0:1]
+; GFX11-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[6:7], v[0:1]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
 ; GFX11-NEXT:    s_clause 0x1
 ; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
@@ -1641,7 +1641,7 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
 ; CISI-NEXT:    v_mov_b32_e32 v1, s9
 ; CISI-NEXT:    v_sub_i32_e32 v0, vcc, s8, v0
 ; CISI-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CISI-NEXT:    v_cmp_lt_u64_e32 vcc, s[8:9], v[0:1]
+; CISI-NEXT:    v_cmp_le_u64_e32 vcc, s[8:9], v[0:1]
 ; CISI-NEXT:    s_mov_b32 s5, s1
 ; CISI-NEXT:    s_mov_b32 s0, s2
 ; CISI-NEXT:    s_mov_b32 s1, s3
@@ -1662,7 +1662,7 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
 ; VI-NEXT:    v_mov_b32_e32 v6, s5
 ; VI-NEXT:    v_sub_u32_e32 v5, vcc, s4, v0
 ; VI-NEXT:    v_subbrev_u32_e32 v6, vcc, 0, v6, vcc
-; VI-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[5:6]
+; VI-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[5:6]
 ; VI-NEXT:    v_mov_b32_e32 v2, s1
 ; VI-NEXT:    v_mov_b32_e32 v3, s2
 ; VI-NEXT:    v_mov_b32_e32 v4, s3
@@ -1680,7 +1680,7 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s7
 ; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, s6, v0
 ; GFX9-NEXT:    v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[0:1]
 ; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX9-NEXT:    global_store_byte v2, v0, s[2:3]
@@ -1695,7 +1695,7 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
 ; GFX1010-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1010-NEXT:    v_sub_co_u32 v0, s4, s6, v0
 ; GFX1010-NEXT:    v_sub_co_ci_u32_e64 v1, s4, s7, 0, s4
-; GFX1010-NEXT:    v_cmp_lt_u64_e32 vcc_lo, s[6:7], v[0:1]
+; GFX1010-NEXT:    v_cmp_le_u64_e32 vcc_lo, s[6:7], v[0:1]
 ; GFX1010-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
 ; GFX1010-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1010-NEXT:    global_store_byte v2, v3, s[2:3]
@@ -1710,7 +1710,7 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
 ; GFX1030W32-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1030W32-NEXT:    v_sub_co_u32 v0, s4, s6, v0
 ; GFX1030W32-NEXT:    v_sub_co_ci_u32_e64 v1, null, s7, 0, s4
-; GFX1030W32-NEXT:    v_cmp_lt_u64_e32 vcc_lo, s[6:7], v[0:1]
+; GFX1030W32-NEXT:    v_cmp_le_u64_e32 vcc_lo, s[6:7], v[0:1]
 ; GFX1030W32-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
 ; GFX1030W32-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1030W32-NEXT:    global_store_byte v2, v3, s[2:3]
@@ -1725,7 +1725,7 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
 ; GFX1030W64-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1030W64-NEXT:    v_sub_co_u32 v0, s[4:5], s6, v0
 ; GFX1030W64-NEXT:    v_sub_co_ci_u32_e64 v1, null, s7, 0, s[4:5]
-; GFX1030W64-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
+; GFX1030W64-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[0:1]
 ; GFX1030W64-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
 ; GFX1030W64-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1030W64-NEXT:    global_store_byte v2, v3, s[2:3]
@@ -1743,7 +1743,7 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
 ; GFX11-NEXT:    v_sub_co_u32 v0, s4, s6, v0
 ; GFX11-NEXT:    v_sub_co_ci_u32_e64 v1, null, s7, 0, s4
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cmp_lt_u64_e32 vcc_lo, s[6:7], v[0:1]
+; GFX11-NEXT:    v_cmp_le_u64_e32 vcc_lo, s[6:7], v[0:1]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
 ; GFX11-NEXT:    s_clause 0x1
 ; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
@@ -1861,9 +1861,9 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; CISI-NEXT:    v_subb_u32_e64 v4, s[0:1], v4, v5, vcc
 ; CISI-NEXT:    v_subrev_i32_e64 v5, s[0:1], s2, v3
 ; CISI-NEXT:    v_subbrev_u32_e64 v4, s[0:1], 0, v4, s[0:1]
-; CISI-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v4
+; CISI-NEXT:    v_cmp_lt_u32_e64 s[0:1], s3, v4
 ; CISI-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[0:1]
-; CISI-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v5
+; CISI-NEXT:    v_cmp_lt_u32_e64 s[0:1], s2, v5
 ; CISI-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
 ; CISI-NEXT:    v_cmp_eq_u32_e64 s[0:1], s3, v4
 ; CISI-NEXT:    v_cndmask_b32_e64 v4, v6, v5, s[0:1]
@@ -1876,9 +1876,9 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; CISI-NEXT:    v_cndmask_b32_e64 v5, v6, v8, s[0:1]
 ; CISI-NEXT:    v_mov_b32_e32 v6, s11
 ; CISI-NEXT:    v_subb_u32_e32 v2, vcc, v6, v2, vcc
-; CISI-NEXT:    v_cmp_le_u32_e32 vcc, s3, v2
+; CISI-NEXT:    v_cmp_lt_u32_e32 vcc, s3, v2
 ; CISI-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
-; CISI-NEXT:    v_cmp_le_u32_e32 vcc, s2, v3
+; CISI-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v3
 ; CISI-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
 ; CISI-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v2
 ; CISI-NEXT:    v_cndmask_b32_e32 v2, v6, v3, vcc
@@ -2011,7 +2011,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; VI-NEXT:    s_subb_u32 s13, s13, 0
 ; VI-NEXT:    s_cmp_ge_u32 s13, s3
 ; VI-NEXT:    s_cselect_b32 s14, -1, 0
-; VI-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v1
+; VI-NEXT:    v_cmp_lt_u32_e64 s[0:1], s2, v1
 ; VI-NEXT:    s_cmp_eq_u32 s13, s3
 ; VI-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[0:1]
 ; VI-NEXT:    v_mov_b32_e32 v3, s14
@@ -2032,7 +2032,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; VI-NEXT:    s_subb_u32 s0, s11, s12
 ; VI-NEXT:    s_cmp_ge_u32 s0, s3
 ; VI-NEXT:    s_cselect_b32 s1, -1, 0
-; VI-NEXT:    v_cmp_le_u32_e32 vcc, s2, v0
+; VI-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v0
 ; VI-NEXT:    s_cmp_eq_u32 s0, s3
 ; VI-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; VI-NEXT:    v_mov_b32_e32 v4, s1
@@ -2178,7 +2178,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GFX9-NEXT:    s_subb_u32 s13, s13, 0
 ; GFX9-NEXT:    s_cmp_ge_u32 s13, s3
 ; GFX9-NEXT:    s_cselect_b32 s14, -1, 0
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v1
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[0:1], s2, v1
 ; GFX9-NEXT:    s_cmp_eq_u32 s13, s3
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s14
@@ -2199,7 +2199,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GFX9-NEXT:    s_subb_u32 s0, s11, s12
 ; GFX9-NEXT:    s_cmp_ge_u32 s0, s3
 ; GFX9-NEXT:    s_cselect_b32 s1, -1, 0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s2, v0
+; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v0
 ; GFX9-NEXT:    s_cmp_eq_u32 s0, s3
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
@@ -2342,7 +2342,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GFX1010-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX1010-NEXT:    s_subb_u32 s6, s6, s3
 ; GFX1010-NEXT:    s_cmp_lg_u32 s12, 0
-; GFX1010-NEXT:    v_cmp_le_u32_e32 vcc_lo, s2, v1
+; GFX1010-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s2, v1
 ; GFX1010-NEXT:    s_subb_u32 s6, s6, 0
 ; GFX1010-NEXT:    s_cmp_ge_u32 s6, s3
 ; GFX1010-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
@@ -2355,7 +2355,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GFX1010-NEXT:    s_add_u32 s13, s1, 2
 ; GFX1010-NEXT:    s_addc_u32 s14, s5, 0
 ; GFX1010-NEXT:    s_cmp_lg_u32 s7, 0
-; GFX1010-NEXT:    v_cmp_le_u32_e32 vcc_lo, s2, v0
+; GFX1010-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s2, v0
 ; GFX1010-NEXT:    s_subb_u32 s0, s11, s0
 ; GFX1010-NEXT:    v_mov_b32_e32 v2, s13
 ; GFX1010-NEXT:    s_cmp_ge_u32 s0, s3
@@ -2504,7 +2504,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GFX1030W32-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX1030W32-NEXT:    s_subb_u32 s6, s6, s3
 ; GFX1030W32-NEXT:    s_cmp_lg_u32 s12, 0
-; GFX1030W32-NEXT:    v_cmp_le_u32_e32 vcc_lo, s2, v1
+; GFX1030W32-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s2, v1
 ; GFX1030W32-NEXT:    s_subb_u32 s6, s6, 0
 ; GFX1030W32-NEXT:    s_cmp_ge_u32 s6, s3
 ; GFX1030W32-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
@@ -2517,7 +2517,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GFX1030W32-NEXT:    s_add_u32 s13, s1, 2
 ; GFX1030W32-NEXT:    s_addc_u32 s14, s5, 0
 ; GFX1030W32-NEXT:    s_cmp_lg_u32 s7, 0
-; GFX1030W32-NEXT:    v_cmp_le_u32_e32 vcc_lo, s2, v0
+; GFX1030W32-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s2, v0
 ; GFX1030W32-NEXT:    s_subb_u32 s0, s11, s0
 ; GFX1030W32-NEXT:    v_mov_b32_e32 v2, s13
 ; GFX1030W32-NEXT:    s_cmp_ge_u32 s0, s3
@@ -2666,7 +2666,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GFX1030W64-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX1030W64-NEXT:    s_subb_u32 s13, s13, s3
 ; GFX1030W64-NEXT:    s_cmp_lg_u64 s[4:5], 0
-; GFX1030W64-NEXT:    v_cmp_le_u32_e32 vcc, s2, v1
+; GFX1030W64-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v1
 ; GFX1030W64-NEXT:    s_subb_u32 s4, s13, 0
 ; GFX1030W64-NEXT:    s_cmp_ge_u32 s4, s3
 ; GFX1030W64-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
@@ -2679,7 +2679,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GFX1030W64-NEXT:    s_add_u32 s13, s6, 2
 ; GFX1030W64-NEXT:    s_addc_u32 s14, s7, 0
 ; GFX1030W64-NEXT:    s_cmp_lg_u64 s[0:1], 0
-; GFX1030W64-NEXT:    v_cmp_le_u32_e32 vcc, s2, v0
+; GFX1030W64-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v0
 ; GFX1030W64-NEXT:    s_subb_u32 s0, s11, s12
 ; GFX1030W64-NEXT:    v_mov_b32_e32 v2, s13
 ; GFX1030W64-NEXT:    s_cmp_ge_u32 s0, s3
@@ -2834,7 +2834,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GFX11-NEXT:    s_cmp_lg_u32 s7, 0
 ; GFX11-NEXT:    s_subb_u32 s6, s6, s3
 ; GFX11-NEXT:    s_cmp_lg_u32 s12, 0
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, s2, v1
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s2, v1
 ; GFX11-NEXT:    s_subb_u32 s6, s6, 0
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_cmp_ge_u32 s6, s3
@@ -2848,7 +2848,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GFX11-NEXT:    s_add_u32 s13, s1, 2
 ; GFX11-NEXT:    s_addc_u32 s14, s5, 0
 ; GFX11-NEXT:    s_cmp_lg_u32 s7, 0
-; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, s2, v0
+; GFX11-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s2, v0
 ; GFX11-NEXT:    s_subb_u32 s0, s11, s0
 ; GFX11-NEXT:    v_mov_b32_e32 v2, s13
 ; GFX11-NEXT:    s_cmp_ge_u32 s0, s3
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index b93750114e191..3b652aebcee61 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -9,7 +9,7 @@
 define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-LABEL: simple_nested_if:
 ; GCN:       ; %bb.0: ; %bb
-; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, 1, v0
+; GCN-NEXT:    v_cmp_le_u32_e32 vcc, 1, v0
 ; GCN-NEXT:    s_and_saveexec_b64 s[6:7], vcc
 ; GCN-NEXT:    s_cbranch_execz .LBB0_3
 ; GCN-NEXT:  ; %bb.1: ; %bb.outer.then
@@ -171,7 +171,7 @@ bb.outer.end:                                     ; preds = %bb.outer.then, %bb.
 define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-LABEL: uncollapsable_nested_if:
 ; GCN:       ; %bb.0: ; %bb
-; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, 1, v0
+; GCN-NEXT:    v_cmp_le_u32_e32 vcc, 1, v0
 ; GCN-NEXT:    s_and_saveexec_b64 s[6:7], vcc
 ; GCN-NEXT:    s_cbranch_execz .LBB1_4
 ; GCN-NEXT:  ; %bb.1: ; %bb.outer.then
@@ -372,7 +372,7 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 ; GCN-NEXT:    s_mov_b32 s2, 0
-; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, 1, v0
+; GCN-NEXT:    v_cmp_le_u32_e32 vcc, 1, v0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    buffer_store_dword v2, v[1:2], s[0:3], 0 addr64
 ; GCN-NEXT:    s_and_saveexec_b64 s[2:3], vcc
@@ -604,7 +604,7 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-NEXT:    v_mov_b32_e32 v2, s1
 ; GCN-NEXT:    v_add_i32_e32 v1, vcc, s0, v3
 ; GCN-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 2, v0
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, 2, v0
 ; GCN-NEXT:    buffer_store_dword v4, v[3:4], s[0:3], 0 addr64
 ; GCN-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GCN-NEXT:    s_xor_b64 s[4:5], exec, s[0:1]
@@ -883,7 +883,7 @@ bb.outer.end:
 define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %arg) {
 ; GCN-LABEL: s_endpgm_unsafe_barrier:
 ; GCN:       ; %bb.0: ; %bb
-; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, 1, v0
+; GCN-NEXT:    v_cmp_le_u32_e32 vcc, 1, v0
 ; GCN-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GCN-NEXT:    s_cbranch_execz .LBB4_2
 ; GCN-NEXT:  ; %bb.1: ; %bb.then
@@ -980,7 +980,7 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    s_movk_i32 s4, 0x207
-; GCN-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
+; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
 ; GCN-NEXT:    s_mov_b64 s[8:9], 0
 ; GCN-NEXT:    v_mov_b32_e32 v7, 0
diff --git a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
index ce4db2f84774b..cea6ae3efcf72 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
@@ -17,7 +17,7 @@ define i32 @combine_add_zext_xor(i32 inreg %cond) {
 ; GFX1010-NEXT:  .LBB0_1: ; %bb9
 ; GFX1010-NEXT:    ; in Loop: Header=BB0_2 Depth=1
 ; GFX1010-NEXT:    s_xor_b32 s5, s5, -1
-; GFX1010-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
+; GFX1010-NEXT:    v_cmp_le_i32_e32 vcc_lo, 0xfffffbe6, v1
 ; GFX1010-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s5
 ; GFX1010-NEXT:    v_add_nc_u32_e32 v2, v1, v0
 ; GFX1010-NEXT:    v_mov_b32_e32 v1, v2
@@ -50,7 +50,7 @@ define i32 @combine_add_zext_xor(i32 inreg %cond) {
 ; GFX1100-NEXT:    ; in Loop: Header=BB0_2 Depth=1
 ; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX1100-NEXT:    s_xor_b32 s1, s1, -1
-; GFX1100-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
+; GFX1100-NEXT:    v_cmp_le_i32_e32 vcc_lo, 0xfffffbe6, v1
 ; GFX1100-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s1
 ; GFX1100-NEXT:    v_add_nc_u32_e32 v2, v1, v0
 ; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
@@ -110,7 +110,7 @@ define i32 @combine_sub_zext_xor(i32 inreg %cond) {
 ; GFX1010-NEXT:  .LBB1_1: ; %bb9
 ; GFX1010-NEXT:    ; in Loop: Header=BB1_2 Depth=1
 ; GFX1010-NEXT:    s_xor_b32 s5, s5, -1
-; GFX1010-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
+; GFX1010-NEXT:    v_cmp_le_i32_e32 vcc_lo, 0xfffffbe6, v1
 ; GFX1010-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s5
 ; GFX1010-NEXT:    v_sub_nc_u32_e32 v2, v1, v0
 ; GFX1010-NEXT:    v_mov_b32_e32 v1, v2
@@ -143,7 +143,7 @@ define i32 @combine_sub_zext_xor(i32 inreg %cond) {
 ; GFX1100-NEXT:    ; in Loop: Header=BB1_2 Depth=1
 ; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX1100-NEXT:    s_xor_b32 s1, s1, -1
-; GFX1100-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
+; GFX1100-NEXT:    v_cmp_le_i32_e32 vcc_lo, 0xfffffbe6, v1
 ; GFX1100-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s1
 ; GFX1100-NEXT:    v_sub_nc_u32_e32 v2, v1, v0
 ; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
@@ -396,7 +396,7 @@ define i32 @combine_add_zext_and(i32 inreg %cond) {
 ; GFX1010-NEXT:    s_branch .LBB4_2
 ; GFX1010-NEXT:  .LBB4_1: ; %bb9
 ; GFX1010-NEXT:    ; in Loop: Header=BB4_2 Depth=1
-; GFX1010-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
+; GFX1010-NEXT:    v_cmp_le_i32_e32 vcc_lo, 0xfffffbe6, v1
 ; GFX1010-NEXT:    s_and_b32 s5, s5, vcc_lo
 ; GFX1010-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s5
 ; GFX1010-NEXT:    v_add_nc_u32_e32 v1, v1, v0
@@ -428,7 +428,7 @@ define i32 @combine_add_zext_and(i32 inreg %cond) {
 ; GFX1100-NEXT:  .LBB4_1: ; %bb9
 ; GFX1100-NEXT:    ; in Loop: Header=BB4_2 Depth=1
 ; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
+; GFX1100-NEXT:    v_cmp_le_i32_e32 vcc_lo, 0xfffffbe6, v1
 ; GFX1100-NEXT:    s_and_b32 s1, s1, vcc_lo
 ; GFX1100-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s1
@@ -488,7 +488,7 @@ define i32 @combine_sub_zext_and(i32 inreg %cond) {
 ; GFX1010-NEXT:    s_branch .LBB5_2
 ; GFX1010-NEXT:  .LBB5_1: ; %bb9
 ; GFX1010-NEXT:    ; in Loop: Header=BB5_2 Depth=1
-; GFX1010-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
+; GFX1010-NEXT:    v_cmp_le_i32_e32 vcc_lo, 0xfffffbe6, v1
 ; GFX1010-NEXT:    s_and_b32 s5, s5, vcc_lo
 ; GFX1010-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s5
 ; GFX1010-NEXT:    v_sub_nc_u32_e32 v1, v1, v0
@@ -520,7 +520,7 @@ define i32 @combine_sub_zext_and(i32 inreg %cond) {
 ; GFX1100-NEXT:  .LBB5_1: ; %bb9
 ; GFX1100-NEXT:    ; in Loop: Header=BB5_2 Depth=1
 ; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
+; GFX1100-NEXT:    v_cmp_le_i32_e32 vcc_lo, 0xfffffbe6, v1
 ; GFX1100-NEXT:    s_and_b32 s1, s1, vcc_lo
 ; GFX1100-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s1
diff --git a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
index 1d20218440f6a..499682ff3c028 100644
--- a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
@@ -13,7 +13,7 @@ define i1 @test1(i32 %arg1, i32 %arg2) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_min_i32_e32 v0, v0, v1
-; GCN-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 0x3e8, v0
+; GCN-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 0x3e8, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp1 = icmp slt i32 %arg1, 1000
@@ -27,7 +27,7 @@ define i1 @test2(i32 %arg1, i32 %arg2) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_min_u32_e32 v0, v0, v1
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 0x3e8, v0
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 0x3e8, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp1 = icmp ult i32 %arg1, 1000
@@ -41,7 +41,7 @@ define i1 @test3(i32 %arg1, i32 %arg2) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_min_i32_e32 v0, v0, v1
-; GCN-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 0x3e9, v0
+; GCN-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 0x3e9, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp1 = icmp sle i32 %arg1, 1000
@@ -55,7 +55,7 @@ define i1 @test4(i32 %arg1, i32 %arg2) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_min_u32_e32 v0, v0, v1
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 0x3e9, v0
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 0x3e9, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp1 = icmp ule i32 %arg1, 1000
@@ -69,7 +69,7 @@ define i1 @test5(i32 %arg1, i32 %arg2) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_max_i32_e32 v0, v0, v1
-; GCN-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0x3e8, v0
+; GCN-NEXT:    v_cmp_le_i32_e32 vcc_lo, 0x3e8, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp1 = icmp sgt i32 %arg1, 1000
@@ -83,7 +83,7 @@ define i1 @test6(i32 %arg1, i32 %arg2) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_max_u32_e32 v0, v0, v1
-; GCN-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 0x3e8, v0
+; GCN-NEXT:    v_cmp_le_u32_e32 vcc_lo, 0x3e8, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp1 = icmp ugt i32 %arg1, 1000
@@ -97,7 +97,7 @@ define i1 @test7(i32 %arg1, i32 %arg2) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_max_i32_e32 v0, v0, v1
-; GCN-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0x3e7, v0
+; GCN-NEXT:    v_cmp_le_i32_e32 vcc_lo, 0x3e7, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp1 = icmp sge i32 %arg1, 1000
@@ -111,7 +111,7 @@ define i1 @test8(i32 %arg1, i32 %arg2) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_max_u32_e32 v0, v0, v1
-; GCN-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 0x3e7, v0
+; GCN-NEXT:    v_cmp_le_u32_e32 vcc_lo, 0x3e7, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp1 = icmp uge i32 %arg1, 1000
@@ -237,7 +237,7 @@ define i1 @test17(i32 %arg1, i32 %arg2) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_max_i32_e32 v0, v0, v1
-; GCN-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 0x3e8, v0
+; GCN-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 0x3e8, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp1 = icmp slt i32 %arg1, 1000
@@ -251,7 +251,7 @@ define i1 @test18(i32 %arg1, i32 %arg2) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_max_u32_e32 v0, v0, v1
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 0x3e8, v0
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 0x3e8, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp1 = icmp ult i32 %arg1, 1000
@@ -265,7 +265,7 @@ define i1 @test19(i32 %arg1, i32 %arg2) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_max_i32_e32 v0, v0, v1
-; GCN-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 0x3e9, v0
+; GCN-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 0x3e9, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp1 = icmp sle i32 %arg1, 1000
@@ -279,7 +279,7 @@ define i1 @test20(i32 %arg1, i32 %arg2) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_max_u32_e32 v0, v0, v1
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 0x3e9, v0
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 0x3e9, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp1 = icmp ule i32 %arg1, 1000
@@ -293,7 +293,7 @@ define i1 @test21(i32 %arg1, i32 %arg2) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_min_i32_e32 v0, v0, v1
-; GCN-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0x3e8, v0
+; GCN-NEXT:    v_cmp_le_i32_e32 vcc_lo, 0x3e8, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp1 = icmp sgt i32 %arg1, 1000
@@ -307,7 +307,7 @@ define i1 @test22(i32 %arg1, i32 %arg2) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_min_u32_e32 v0, v0, v1
-; GCN-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 0x3e8, v0
+; GCN-NEXT:    v_cmp_le_u32_e32 vcc_lo, 0x3e8, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp1 = icmp ugt i32 %arg1, 1000
@@ -321,7 +321,7 @@ define i1 @test23(i32 %arg1, i32 %arg2) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_min_i32_e32 v0, v0, v1
-; GCN-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0x3e7, v0
+; GCN-NEXT:    v_cmp_le_i32_e32 vcc_lo, 0x3e7, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp1 = icmp sge i32 %arg1, 1000
@@ -335,7 +335,7 @@ define i1 @test24(i32 %arg1, i32 %arg2) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_min_u32_e32 v0, v0, v1
-; GCN-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 0x3e7, v0
+; GCN-NEXT:    v_cmp_le_u32_e32 vcc_lo, 0x3e7, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cmp1 = icmp uge i32 %arg1, 1000
@@ -2695,8 +2695,8 @@ define i1 @test124(i32 %arg1, i64 %arg2) {
 ; GCN-LABEL: test124:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cmp_gt_i64_e32 vcc_lo, 0x3e8, v[1:2]
-; GCN-NEXT:    v_cmp_gt_i32_e64 s0, 0x3e8, v0
+; GCN-NEXT:    v_cmp_ge_i64_e32 vcc_lo, 0x3e8, v[1:2]
+; GCN-NEXT:    v_cmp_ge_i32_e64 s0, 0x3e8, v0
 ; GCN-NEXT:    s_or_b32 s0, s0, vcc_lo
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
@@ -2800,7 +2800,7 @@ define i1 @test131(i16 %arg1, i32 %arg2) {
 ; GFX11-TRUE16-LABEL: test131:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 10, v1
+; GFX11-TRUE16-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 10, v1
 ; GFX11-TRUE16-NEXT:    v_cmp_gt_u16_e64 s0, 10, v0.l
 ; GFX11-TRUE16-NEXT:    s_or_b32 s0, s0, vcc_lo
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
@@ -2810,7 +2810,7 @@ define i1 @test131(i16 %arg1, i32 %arg2) {
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 10, v0
-; GFX11-FAKE16-NEXT:    v_cmp_gt_u32_e64 s0, 10, v1
+; GFX11-FAKE16-NEXT:    v_cmp_ge_u32_e64 s0, 10, v1
 ; GFX11-FAKE16-NEXT:    s_or_b32 s0, vcc_lo, s0
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
@@ -2818,7 +2818,7 @@ define i1 @test131(i16 %arg1, i32 %arg2) {
 ; GCN-TRUE16-LABEL: test131:
 ; GCN-TRUE16:       ; %bb.0:
 ; GCN-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-TRUE16-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 10, v1
+; GCN-TRUE16-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 10, v1
 ; GCN-TRUE16-NEXT:    v_cmp_gt_u16_e64 s0, 10, v0.l
 ; GCN-TRUE16-NEXT:    s_or_b32 s0, s0, vcc_lo
 ; GCN-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
@@ -2828,7 +2828,7 @@ define i1 @test131(i16 %arg1, i32 %arg2) {
 ; GCN-FAKE16:       ; %bb.0:
 ; GCN-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-FAKE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 10, v0
-; GCN-FAKE16-NEXT:    v_cmp_gt_u32_e64 s0, 10, v1
+; GCN-FAKE16-NEXT:    v_cmp_ge_u32_e64 s0, 10, v1
 ; GCN-FAKE16-NEXT:    s_or_b32 s0, vcc_lo, s0
 ; GCN-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
 ; GCN-FAKE16-NEXT:    s_setpc_b64 s[30:31]
@@ -2863,8 +2863,8 @@ define i1 @test133(i32 %arg1, i32 %arg2) {
 ; GCN-LABEL: test133:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 0x64, v0
-; GCN-NEXT:    v_cmp_gt_u32_e64 s0, 0x3e8, v1
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 0x64, v0
+; GCN-NEXT:    v_cmp_ge_u32_e64 s0, 0x3e8, v1
 ; GCN-NEXT:    s_or_b32 s0, vcc_lo, s0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/commute-compares.ll b/llvm/test/CodeGen/AMDGPU/commute-compares.ll
index ae8080cf9f06a..ce00534912fb6 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-compares.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-compares.ll
@@ -103,7 +103,7 @@ define amdgpu_kernel void @commute_ugt_64_i32(ptr addrspace(1) %out, ptr addrspa
 ; GCN-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; GCN-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, 64, v2
+; GCN-NEXT:    v_cmp_le_u32_e32 vcc, 64, v2
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
 ; GCN-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
@@ -130,7 +130,7 @@ define amdgpu_kernel void @commute_uge_64_i32(ptr addrspace(1) %out, ptr addrspa
 ; GCN-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; GCN-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, 63, v2
+; GCN-NEXT:    v_cmp_le_u32_e32 vcc, 63, v2
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
 ; GCN-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
@@ -157,7 +157,7 @@ define amdgpu_kernel void @commute_ult_64_i32(ptr addrspace(1) %out, ptr addrspa
 ; GCN-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; GCN-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
 ; GCN-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
@@ -184,7 +184,7 @@ define amdgpu_kernel void @commute_ule_63_i32(ptr addrspace(1) %out, ptr addrspa
 ; GCN-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; GCN-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
 ; GCN-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
@@ -212,7 +212,7 @@ define amdgpu_kernel void @commute_ule_64_i32(ptr addrspace(1) %out, ptr addrspa
 ; GCN-NEXT:    s_movk_i32 s4, 0x41
 ; GCN-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, s4, v2
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, s4, v2
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
 ; GCN-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
@@ -266,7 +266,7 @@ define amdgpu_kernel void @commute_sge_neg2_i32(ptr addrspace(1) %out, ptr addrs
 ; GCN-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; GCN-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, -3, v2
+; GCN-NEXT:    v_cmp_le_i32_e32 vcc, -3, v2
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
 ; GCN-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
@@ -293,7 +293,7 @@ define amdgpu_kernel void @commute_slt_neg16_i32(ptr addrspace(1) %out, ptr addr
 ; GCN-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; GCN-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_gt_i32_e32 vcc, -16, v2
+; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, -16, v2
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
 ; GCN-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
@@ -320,7 +320,7 @@ define amdgpu_kernel void @commute_sle_5_i32(ptr addrspace(1) %out, ptr addrspac
 ; GCN-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; GCN-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_gt_i32_e32 vcc, 6, v2
+; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, 6, v2
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
 ; GCN-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
@@ -408,7 +408,7 @@ define amdgpu_kernel void @commute_ugt_64_i64(ptr addrspace(1) %out, ptr addrspa
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GCN-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_lt_u64_e32 vcc, 64, v[3:4]
+; GCN-NEXT:    v_cmp_le_u64_e32 vcc, 64, v[3:4]
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; GCN-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
@@ -436,7 +436,7 @@ define amdgpu_kernel void @commute_uge_64_i64(ptr addrspace(1) %out, ptr addrspa
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GCN-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[3:4]
+; GCN-NEXT:    v_cmp_le_u64_e32 vcc, 63, v[3:4]
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; GCN-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
@@ -464,7 +464,7 @@ define amdgpu_kernel void @commute_ult_64_i64(ptr addrspace(1) %out, ptr addrspa
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GCN-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_gt_u64_e32 vcc, 64, v[3:4]
+; GCN-NEXT:    v_cmp_ge_u64_e32 vcc, 64, v[3:4]
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; GCN-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
@@ -492,7 +492,7 @@ define amdgpu_kernel void @commute_ule_63_i64(ptr addrspace(1) %out, ptr addrspa
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GCN-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_gt_u64_e32 vcc, 64, v[3:4]
+; GCN-NEXT:    v_cmp_ge_u64_e32 vcc, 64, v[3:4]
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; GCN-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
@@ -523,7 +523,7 @@ define amdgpu_kernel void @commute_ule_64_i64(ptr addrspace(1) %out, ptr addrspa
 ; GCN-NEXT:    s_mov_b64 s[4:5], 0x41
 ; GCN-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[3:4]
+; GCN-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[3:4]
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; GCN-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
@@ -551,7 +551,7 @@ define amdgpu_kernel void @commute_sgt_neg1_i64(ptr addrspace(1) %out, ptr addrs
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GCN-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[3:4]
+; GCN-NEXT:    v_cmp_le_i64_e32 vcc, -1, v[3:4]
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; GCN-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
@@ -579,7 +579,7 @@ define amdgpu_kernel void @commute_sge_neg2_i64(ptr addrspace(1) %out, ptr addrs
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GCN-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_lt_i64_e32 vcc, -3, v[3:4]
+; GCN-NEXT:    v_cmp_le_i64_e32 vcc, -3, v[3:4]
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; GCN-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
@@ -607,7 +607,7 @@ define amdgpu_kernel void @commute_slt_neg16_i64(ptr addrspace(1) %out, ptr addr
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GCN-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_gt_i64_e32 vcc, -16, v[3:4]
+; GCN-NEXT:    v_cmp_ge_i64_e32 vcc, -16, v[3:4]
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; GCN-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
@@ -635,7 +635,7 @@ define amdgpu_kernel void @commute_sle_5_i64(ptr addrspace(1) %out, ptr addrspac
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GCN-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_gt_i64_e32 vcc, 6, v[3:4]
+; GCN-NEXT:    v_cmp_ge_i64_e32 vcc, 6, v[3:4]
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; GCN-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll b/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll
index b5616501900dd..595d6706d4509 100644
--- a/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll
+++ b/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll
@@ -53,7 +53,7 @@ define void @phi_with_alloca_and_divergent_copy_to_reg(ptr addrspace(5) %diverge
 ; CHECK-NEXT:    v_lshl_add_u32 v2, v3, 2, v1
 ; CHECK-NEXT:    buffer_store_dword v3, v2, s[0:3], 0 offen
 ; CHECK-NEXT:    v_add_u32_e32 v2, 1, v3
-; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v2
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 15, v2
 ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; CHECK-NEXT:    v_mov_b32_e32 v3, v4
 ; CHECK-NEXT:    v_mov_b32_e32 v2, v0
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index 1396099dbfa6a..31d7058186853 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -13,7 +13,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_subb_co_u32_e32 v9, vcc, 0, v1, vcc
 ; GFX9-NEXT:    v_subb_co_u32_e32 v10, vcc, 0, v2, vcc
 ; GFX9-NEXT:    v_subb_co_u32_e32 v11, vcc, 0, v3, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
+; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[2:3]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v16, 31, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v9, v1, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v8, v0, v8, vcc
@@ -23,7 +23,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, 0, v5, vcc
 ; GFX9-NEXT:    v_subb_co_u32_e32 v2, vcc, 0, v6, vcc
 ; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, 0, v7, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[6:7]
+; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[6:7]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v17, 31, v7
 ; GFX9-NEXT:    v_cndmask_b32_e32 v20, v5, v1, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v21, v4, v0, vcc
@@ -68,7 +68,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_subb_co_u32_e32 v4, vcc, 0, v5, vcc
 ; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, 0, v5, vcc
 ; GFX9-NEXT:    s_mov_b64 s[6:7], 0x7f
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v18, v16
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
@@ -107,7 +107,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v2, v[8:9]
 ; GFX9-NEXT:    v_or_b32_e32 v4, v6, v13
 ; GFX9-NEXT:    v_or_b32_e32 v5, v5, v12
-; GFX9-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v7
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v7
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
 ; GFX9-NEXT:    v_lshlrev_b64 v[4:5], v7, v[8:9]
@@ -127,7 +127,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_sub_u32_e32 v12, 64, v22
 ; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v22, v[8:9]
 ; GFX9-NEXT:    v_lshlrev_b64 v[12:13], v12, v[10:11]
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v22
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v22
 ; GFX9-NEXT:    v_or_b32_e32 v12, v6, v12
 ; GFX9-NEXT:    v_subrev_u32_e32 v6, 64, v22
 ; GFX9-NEXT:    v_or_b32_e32 v13, v7, v13
@@ -1277,7 +1277,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-NEXT:    v_cmp_gt_u64_e64 s[6:7], v[0:1], v[6:7]
 ; GFX9-G-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX9-G-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[6:7]
-; GFX9-G-NEXT:    v_cmp_lt_u64_e64 s[6:7], 0, v[2:3]
+; GFX9-G-NEXT:    v_cmp_le_u64_e64 s[6:7], 0, v[2:3]
 ; GFX9-G-NEXT:    v_or_b32_e32 v15, v1, v3
 ; GFX9-G-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[6:7]
 ; GFX9-G-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[2:3]
@@ -1316,7 +1316,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GFX9-G-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX9-G-NEXT:    v_lshlrev_b64 v[0:1], v13, v[8:9]
-; GFX9-G-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v12
+; GFX9-G-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v12
 ; GFX9-G-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc
 ; GFX9-G-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc
 ; GFX9-G-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
@@ -1341,7 +1341,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GFX9-G-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX9-G-NEXT:    v_lshrrev_b64 v[0:1], v24, v[10:11]
-; GFX9-G-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v20
+; GFX9-G-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v20
 ; GFX9-G-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX9-G-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX9-G-NEXT:    v_cndmask_b32_e32 v14, 0, v14, vcc
@@ -2344,7 +2344,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0
 ; GFX9-NEXT:    v_subb_co_u32_e32 v14, vcc, 0, v8, vcc
 ; GFX9-NEXT:    v_subb_co_u32_e32 v15, vcc, 0, v8, vcc
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[12:13]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[12:13]
 ; GFX9-NEXT:    v_or_b32_e32 v10, v13, v15
 ; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
@@ -2381,7 +2381,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_lshlrev_b64 v[8:9], v8, v[0:1]
 ; GFX9-NEXT:    v_or_b32_e32 v11, v11, v14
 ; GFX9-NEXT:    v_or_b32_e32 v10, v10, v13
-; GFX9-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v15
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v15
 ; GFX9-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[4:5]
 ; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v15, v[0:1]
@@ -2401,7 +2401,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_sub_u32_e32 v14, 64, v18
 ; GFX9-NEXT:    v_lshrrev_b64 v[12:13], v18, v[0:1]
 ; GFX9-NEXT:    v_lshlrev_b64 v[14:15], v14, v[2:3]
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v18
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v18
 ; GFX9-NEXT:    v_or_b32_e32 v14, v12, v14
 ; GFX9-NEXT:    v_subrev_u32_e32 v12, 64, v18
 ; GFX9-NEXT:    v_or_b32_e32 v15, v13, v15
@@ -3398,7 +3398,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-NEXT:    v_cmp_gt_u64_e64 s[6:7], v[12:13], v[8:9]
 ; GFX9-G-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX9-G-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[6:7]
-; GFX9-G-NEXT:    v_cmp_lt_u64_e64 s[6:7], 0, v[14:15]
+; GFX9-G-NEXT:    v_cmp_le_u64_e64 s[6:7], 0, v[14:15]
 ; GFX9-G-NEXT:    v_or_b32_e32 v17, v13, v15
 ; GFX9-G-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[6:7]
 ; GFX9-G-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[14:15]
@@ -3437,7 +3437,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-NEXT:    v_or_b32_e32 v10, v8, v10
 ; GFX9-G-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GFX9-G-NEXT:    v_lshlrev_b64 v[8:9], v14, v[0:1]
-; GFX9-G-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
+; GFX9-G-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v16
 ; GFX9-G-NEXT:    s_mov_b64 s[10:11], s[8:9]
 ; GFX9-G-NEXT:    v_cndmask_b32_e32 v14, 0, v12, vcc
 ; GFX9-G-NEXT:    v_cndmask_b32_e32 v15, 0, v13, vcc
@@ -3462,7 +3462,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-NEXT:    v_lshrrev_b64 v[2:3], v22, v[2:3]
 ; GFX9-G-NEXT:    v_or_b32_e32 v10, v10, v12
 ; GFX9-G-NEXT:    v_or_b32_e32 v11, v11, v13
-; GFX9-G-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v18
+; GFX9-G-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v18
 ; GFX9-G-NEXT:    s_mov_b64 s[8:9], 0
 ; GFX9-G-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
 ; GFX9-G-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index de439c6f46c6e..251efd7d60a61 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -15,7 +15,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_mov_b32_e32 v26, v24
 ; SDAG-NEXT:    v_mov_b32_e32 v27, v25
 ; SDAG-NEXT:    v_subb_u32_e32 v21, vcc, 0, v2, vcc
-; SDAG-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
+; SDAG-NEXT:    v_cmp_ge_i64_e64 s[4:5], 0, v[2:3]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v19, v1, v17, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v18, v0, v16, s[4:5]
 ; SDAG-NEXT:    v_subb_u32_e32 v0, vcc, 0, v3, vcc
@@ -33,7 +33,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_add_i32_e64 v21, s[4:5], 32, v22
 ; SDAG-NEXT:    v_ffbh_u32_e32 v22, v17
 ; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
-; SDAG-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[10:11]
+; SDAG-NEXT:    v_cmp_ge_i64_e64 s[6:7], 0, v[10:11]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v28, v9, v23, s[6:7]
 ; SDAG-NEXT:    v_subb_u32_e32 v0, vcc, 0, v10, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v29, v8, v3, s[6:7]
@@ -67,7 +67,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subb_u32_e32 v3, vcc, v8, v9, vcc
 ; SDAG-NEXT:    v_xor_b32_e32 v8, 0x7f, v2
 ; SDAG-NEXT:    v_subb_u32_e32 v10, vcc, 0, v20, vcc
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[10:11], v[2:3]
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[4:5], s[10:11], v[2:3]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v21, 0, 1, s[4:5]
 ; SDAG-NEXT:    v_subb_u32_e32 v11, vcc, 0, v20, vcc
 ; SDAG-NEXT:    v_or_b32_e32 v8, v8, v10
@@ -107,7 +107,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_lshr_b64 v[10:11], v[18:19], v35
 ; SDAG-NEXT:    v_or_b32_e32 v3, v3, v11
 ; SDAG-NEXT:    v_or_b32_e32 v2, v2, v10
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v34
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v34
 ; SDAG-NEXT:    v_cndmask_b32_e64 v3, v21, v3, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v2, v20, v2, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v21, 0, v23, s[4:5]
@@ -126,7 +126,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_lshl_b64 v[10:11], v[16:17], v10
 ; SDAG-NEXT:    v_or_b32_e32 v11, v9, v11
 ; SDAG-NEXT:    v_or_b32_e32 v10, v8, v10
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v30
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v30
 ; SDAG-NEXT:    v_subrev_i32_e64 v8, s[4:5], 64, v30
 ; SDAG-NEXT:    v_lshr_b64 v[8:9], v[16:17], v8
 ; SDAG-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
@@ -212,7 +212,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_mov_b32_e32 v19, v17
 ; SDAG-NEXT:    v_subb_u32_e32 v1, vcc, 0, v5, vcc
 ; SDAG-NEXT:    v_subb_u32_e32 v9, vcc, 0, v6, vcc
-; SDAG-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[6:7]
+; SDAG-NEXT:    v_cmp_ge_i64_e64 s[4:5], 0, v[6:7]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v1, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, v0, s[4:5]
 ; SDAG-NEXT:    v_subb_u32_e32 v0, vcc, 0, v7, vcc
@@ -230,7 +230,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_ffbh_u32_e32 v30, v5
 ; SDAG-NEXT:    v_min_u32_e32 v6, v10, v6
 ; SDAG-NEXT:    v_subb_u32_e32 v10, vcc, 0, v14, vcc
-; SDAG-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[14:15]
+; SDAG-NEXT:    v_cmp_ge_i64_e64 s[4:5], 0, v[14:15]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v28, v13, v11, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v29, v12, v7, s[4:5]
 ; SDAG-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[0:1]
@@ -264,7 +264,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subb_u32_e32 v7, vcc, v9, v12, vcc
 ; SDAG-NEXT:    v_xor_b32_e32 v9, 0x7f, v6
 ; SDAG-NEXT:    v_subb_u32_e32 v10, vcc, 0, v8, vcc
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[10:11], v[6:7]
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[4:5], s[10:11], v[6:7]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
 ; SDAG-NEXT:    v_subb_u32_e32 v11, vcc, 0, v8, vcc
 ; SDAG-NEXT:    v_or_b32_e32 v8, v9, v10
@@ -304,7 +304,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_lshr_b64 v[10:11], v[2:3], v35
 ; SDAG-NEXT:    v_or_b32_e32 v7, v7, v11
 ; SDAG-NEXT:    v_or_b32_e32 v6, v6, v10
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v34
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v34
 ; SDAG-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v6, v12, v6, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v11, 0, v15, s[4:5]
@@ -334,7 +334,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_or_b32_e32 v9, v9, v49
 ; SDAG-NEXT:    v_or_b32_e32 v8, v8, v48
 ; SDAG-NEXT:    v_addc_u32_e32 v36, vcc, -1, v0, vcc
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v30
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v30
 ; SDAG-NEXT:    v_cndmask_b32_e64 v9, v5, v9, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v8, v4, v8, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, v38, s[4:5]
@@ -485,7 +485,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
 ; GISEL-NEXT:    v_or_b32_e32 v8, v8, v0
 ; GISEL-NEXT:    v_or_b32_e32 v9, v3, v1
-; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, 0, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v16, v17, v16, vcc
@@ -518,7 +518,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
 ; GISEL-NEXT:    v_lshr_b64 v[8:9], v[18:19], v8
 ; GISEL-NEXT:    v_lshl_b64 v[22:23], v[18:19], v16
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v32
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v32
 ; GISEL-NEXT:    v_cndmask_b32_e32 v16, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v17, 0, v1, vcc
 ; GISEL-NEXT:    v_or_b32_e32 v0, v8, v2
@@ -539,7 +539,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:  ; %bb.2: ; %udiv-preheader4
 ; GISEL-NEXT:    v_add_i32_e32 v32, vcc, 0xffffffc0, v28
 ; GISEL-NEXT:    v_sub_i32_e32 v22, vcc, 64, v28
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v28
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v28
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v28
 ; GISEL-NEXT:    v_lshr_b64 v[0:1], v[20:21], v28
 ; GISEL-NEXT:    v_lshr_b64 v[2:3], v[18:19], v28
@@ -674,7 +674,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[2:3], v[10:11]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
 ; GISEL-NEXT:    v_xor_b32_e32 v10, 0x7f, v2
-; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, 0, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
 ; GISEL-NEXT:    v_or_b32_e32 v10, v10, v0
 ; GISEL-NEXT:    v_or_b32_e32 v11, v3, v1
@@ -709,7 +709,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
 ; GISEL-NEXT:    v_lshr_b64 v[10:11], v[6:7], v10
 ; GISEL-NEXT:    v_lshl_b64 v[16:17], v[6:7], v14
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v30
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v30
 ; GISEL-NEXT:    v_cndmask_b32_e32 v14, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v15, 0, v1, vcc
 ; GISEL-NEXT:    v_or_b32_e32 v0, v10, v2
@@ -742,7 +742,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v16
 ; GISEL-NEXT:    v_or_b32_e32 v3, v3, v17
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v26
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v26
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v12, v2, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v3, v13, v3, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v16, 0, v0, vcc
@@ -873,7 +873,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subb_u32_e32 v23, vcc, v20, v17, vcc
 ; SDAG-NEXT:    v_xor_b32_e32 v16, 0x7f, v22
 ; SDAG-NEXT:    v_subb_u32_e32 v24, vcc, 0, v28, vcc
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[8:9], v[22:23]
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[4:5], s[8:9], v[22:23]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
 ; SDAG-NEXT:    v_subb_u32_e32 v25, vcc, 0, v28, vcc
 ; SDAG-NEXT:    v_or_b32_e32 v16, v16, v24
@@ -913,7 +913,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_lshr_b64 v[18:19], v[0:1], v31
 ; SDAG-NEXT:    v_or_b32_e32 v19, v23, v19
 ; SDAG-NEXT:    v_or_b32_e32 v18, v22, v18
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v30
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v30
 ; SDAG-NEXT:    v_cndmask_b32_e64 v19, v17, v19, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v18, v16, v18, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v17, 0, v25, s[4:5]
@@ -932,7 +932,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_lshl_b64 v[22:23], v[2:3], v22
 ; SDAG-NEXT:    v_or_b32_e32 v23, v21, v23
 ; SDAG-NEXT:    v_or_b32_e32 v22, v20, v22
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v26
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v26
 ; SDAG-NEXT:    v_subrev_i32_e64 v20, s[4:5], 64, v26
 ; SDAG-NEXT:    v_lshr_b64 v[20:21], v[2:3], v20
 ; SDAG-NEXT:    v_cndmask_b32_e32 v21, v21, v23, vcc
@@ -1048,7 +1048,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subb_u32_e32 v1, vcc, v8, v1, vcc
 ; SDAG-NEXT:    v_xor_b32_e32 v2, 0x7f, v0
 ; SDAG-NEXT:    v_subb_u32_e32 v20, vcc, 0, v24, vcc
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[8:9], v[0:1]
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[4:5], s[8:9], v[0:1]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
 ; SDAG-NEXT:    v_subb_u32_e32 v21, vcc, 0, v24, vcc
 ; SDAG-NEXT:    v_or_b32_e32 v2, v2, v20
@@ -1088,7 +1088,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_lshr_b64 v[10:11], v[4:5], v27
 ; SDAG-NEXT:    v_or_b32_e32 v1, v1, v11
 ; SDAG-NEXT:    v_or_b32_e32 v0, v0, v10
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v26
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v26
 ; SDAG-NEXT:    v_cndmask_b32_e64 v1, v9, v1, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v0, v8, v0, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v9, 0, v21, s[4:5]
@@ -1118,7 +1118,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_or_b32_e32 v3, v3, v32
 ; SDAG-NEXT:    v_or_b32_e32 v2, v2, v31
 ; SDAG-NEXT:    v_addc_u32_e32 v28, vcc, -1, v14, vcc
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v22
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v22
 ; SDAG-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v7, 0, v30, s[4:5]
@@ -1241,7 +1241,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v20
 ; GISEL-NEXT:    v_or_b32_e32 v3, v23, v21
-; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[20:21]
+; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, 0, v[20:21]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[20:21]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v18, v19, v18, vcc
@@ -1274,7 +1274,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
 ; GISEL-NEXT:    v_lshr_b64 v[20:21], v[0:1], v20
 ; GISEL-NEXT:    v_lshl_b64 v[24:25], v[0:1], v22
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v30
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v30
 ; GISEL-NEXT:    v_cndmask_b32_e32 v22, 0, v2, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v23, 0, v3, vcc
 ; GISEL-NEXT:    v_or_b32_e32 v2, v20, v18
@@ -1307,7 +1307,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    v_or_b32_e32 v20, v20, v24
 ; GISEL-NEXT:    v_or_b32_e32 v21, v21, v25
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v26
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v26
 ; GISEL-NEXT:    v_cndmask_b32_e32 v20, v16, v20, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v21, v17, v21, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v16, 0, v18, vcc
@@ -1412,7 +1412,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[16:17], v[10:11]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; GISEL-NEXT:    v_xor_b32_e32 v8, 0x7f, v16
-; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, 0, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
 ; GISEL-NEXT:    v_or_b32_e32 v8, v8, v0
 ; GISEL-NEXT:    v_or_b32_e32 v9, v17, v1
@@ -1447,7 +1447,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
 ; GISEL-NEXT:    v_lshr_b64 v[20:21], v[4:5], v10
 ; GISEL-NEXT:    v_lshl_b64 v[22:23], v[4:5], v9
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v26
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v26
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v1, vcc
 ; GISEL-NEXT:    v_or_b32_e32 v0, v20, v16
@@ -1480,7 +1480,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    v_or_b32_e32 v20, v20, v22
 ; GISEL-NEXT:    v_or_b32_e32 v21, v21, v23
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v8
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v8
 ; GISEL-NEXT:    v_cndmask_b32_e32 v6, v6, v20, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v7, v7, v21, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v16, 0, v16, vcc
@@ -1568,7 +1568,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subb_u32_e32 v17, vcc, 0, v1, vcc
 ; SDAG-NEXT:    v_mov_b32_e32 v29, v28
 ; SDAG-NEXT:    v_subb_u32_e32 v18, vcc, 0, v2, vcc
-; SDAG-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
+; SDAG-NEXT:    v_cmp_ge_i64_e64 s[4:5], 0, v[2:3]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v17, v1, v17, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v16, v0, v16, s[4:5]
 ; SDAG-NEXT:    v_subb_u32_e32 v1, vcc, 0, v3, vcc
@@ -1585,7 +1585,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_min_u32_e32 v18, v18, v20
 ; SDAG-NEXT:    v_add_i32_e64 v20, s[4:5], 32, v22
 ; SDAG-NEXT:    v_ffbh_u32_e32 v22, v1
-; SDAG-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[10:11]
+; SDAG-NEXT:    v_cmp_ge_i64_e64 s[4:5], 0, v[10:11]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v30, v9, v23, s[4:5]
 ; SDAG-NEXT:    v_subb_u32_e32 v9, vcc, 0, v10, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v31, v8, v21, s[4:5]
@@ -1620,7 +1620,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subb_u32_e32 v11, vcc, v11, v18, vcc
 ; SDAG-NEXT:    v_xor_b32_e32 v8, 0x7f, v10
 ; SDAG-NEXT:    v_subb_u32_e32 v18, vcc, 0, v19, vcc
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[10:11], v[10:11]
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[4:5], s[10:11], v[10:11]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[4:5]
 ; SDAG-NEXT:    v_subb_u32_e32 v19, vcc, 0, v19, vcc
 ; SDAG-NEXT:    v_or_b32_e32 v8, v8, v18
@@ -1660,7 +1660,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_lshr_b64 v[18:19], v[16:17], v25
 ; SDAG-NEXT:    v_or_b32_e32 v11, v11, v19
 ; SDAG-NEXT:    v_or_b32_e32 v10, v10, v18
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v24
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v24
 ; SDAG-NEXT:    v_cndmask_b32_e64 v11, v21, v11, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v10, v20, v10, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v21, 0, v23, s[4:5]
@@ -1690,7 +1690,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_or_b32_e32 v9, v9, v27
 ; SDAG-NEXT:    v_or_b32_e32 v8, v8, v26
 ; SDAG-NEXT:    v_addc_u32_e32 v38, vcc, -1, v2, vcc
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v32
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v32
 ; SDAG-NEXT:    v_cndmask_b32_e64 v9, v49, v9, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v8, v48, v8, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v27, 0, v25, s[4:5]
@@ -1763,7 +1763,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_mov_b32_e32 v35, v26
 ; SDAG-NEXT:    v_subb_u32_e32 v9, vcc, 0, v5, vcc
 ; SDAG-NEXT:    v_subb_u32_e32 v10, vcc, 0, v6, vcc
-; SDAG-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[6:7]
+; SDAG-NEXT:    v_cmp_ge_i64_e64 s[4:5], 0, v[6:7]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v9, v5, v9, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v8, v4, v8, s[4:5]
 ; SDAG-NEXT:    v_subb_u32_e32 v5, vcc, 0, v7, vcc
@@ -1781,7 +1781,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_ffbh_u32_e32 v22, v5
 ; SDAG-NEXT:    v_min_u32_e32 v10, v10, v11
 ; SDAG-NEXT:    v_subb_u32_e32 v11, vcc, 0, v14, vcc
-; SDAG-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[14:15]
+; SDAG-NEXT:    v_cmp_ge_i64_e64 s[4:5], 0, v[14:15]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v36, v13, v21, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v37, v12, v19, s[4:5]
 ; SDAG-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[6:7]
@@ -1815,7 +1815,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subb_u32_e32 v11, vcc, v13, v12, vcc
 ; SDAG-NEXT:    v_xor_b32_e32 v14, 0x7f, v10
 ; SDAG-NEXT:    v_subb_u32_e32 v12, vcc, 0, v18, vcc
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[10:11], v[10:11]
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[4:5], s[10:11], v[10:11]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
 ; SDAG-NEXT:    v_subb_u32_e32 v13, vcc, 0, v18, vcc
 ; SDAG-NEXT:    v_or_b32_e32 v14, v14, v12
@@ -1855,7 +1855,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_lshr_b64 v[10:11], v[8:9], v10
 ; SDAG-NEXT:    v_or_b32_e32 v11, v21, v11
 ; SDAG-NEXT:    v_or_b32_e32 v10, v20, v10
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v13
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v13
 ; SDAG-NEXT:    v_cndmask_b32_e64 v12, v19, v11, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v18, v18, v10, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v11, 0, v23, s[4:5]
@@ -1885,7 +1885,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_or_b32_e32 v15, v15, v25
 ; SDAG-NEXT:    v_or_b32_e32 v14, v14, v24
 ; SDAG-NEXT:    v_addc_u32_e32 v52, vcc, -1, v6, vcc
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v38
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v38
 ; SDAG-NEXT:    v_cndmask_b32_e64 v15, v54, v15, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v14, v53, v14, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v25, 0, v23, s[4:5]
@@ -2083,7 +2083,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
 ; GISEL-NEXT:    v_or_b32_e32 v18, v18, v0
 ; GISEL-NEXT:    v_or_b32_e32 v19, v3, v1
-; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, 0, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v22, 0, 1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v20, v22, v20, vcc
@@ -2116,7 +2116,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
 ; GISEL-NEXT:    v_lshr_b64 v[18:19], v[16:17], v18
 ; GISEL-NEXT:    v_lshl_b64 v[22:23], v[16:17], v20
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v24
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v24
 ; GISEL-NEXT:    v_cndmask_b32_e32 v20, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v21, 0, v1, vcc
 ; GISEL-NEXT:    v_or_b32_e32 v0, v18, v2
@@ -2149,7 +2149,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v22
 ; GISEL-NEXT:    v_or_b32_e32 v3, v3, v23
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v31
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v31
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v24, v2, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v3, v25, v3, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v24, 0, v0, vcc
@@ -2272,7 +2272,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[14:15], v[2:3]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, vcc
 ; GISEL-NEXT:    v_xor_b32_e32 v2, 0x7f, v14
-; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, 0, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v22, 0, 1, vcc
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v0
 ; GISEL-NEXT:    v_or_b32_e32 v3, v15, v1
@@ -2307,7 +2307,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
 ; GISEL-NEXT:    v_lshr_b64 v[14:15], v[12:13], v14
 ; GISEL-NEXT:    v_lshl_b64 v[22:23], v[12:13], v20
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v24
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v24
 ; GISEL-NEXT:    v_cndmask_b32_e32 v20, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v21, 0, v1, vcc
 ; GISEL-NEXT:    v_or_b32_e32 v0, v14, v2
@@ -2340,7 +2340,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v22
 ; GISEL-NEXT:    v_or_b32_e32 v3, v3, v23
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v36
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v36
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v24, v2, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v3, v25, v3, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v26, 0, v0, vcc
@@ -2503,7 +2503,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subb_u32_e32 v19, vcc, v20, v17, vcc
 ; SDAG-NEXT:    v_xor_b32_e32 v16, 0x7f, v18
 ; SDAG-NEXT:    v_subb_u32_e32 v20, vcc, 0, v28, vcc
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[8:9], v[18:19]
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[4:5], s[8:9], v[18:19]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v22, 0, 1, s[4:5]
 ; SDAG-NEXT:    v_subb_u32_e32 v21, vcc, 0, v28, vcc
 ; SDAG-NEXT:    v_or_b32_e32 v16, v16, v20
@@ -2543,7 +2543,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_lshr_b64 v[18:19], v[0:1], v18
 ; SDAG-NEXT:    v_or_b32_e32 v19, v25, v19
 ; SDAG-NEXT:    v_or_b32_e32 v18, v24, v18
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v21
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v21
 ; SDAG-NEXT:    v_cndmask_b32_e64 v19, v23, v19, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v18, v22, v18, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v23, 0, v27, s[4:5]
@@ -2573,7 +2573,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_or_b32_e32 v17, v17, v29
 ; SDAG-NEXT:    v_or_b32_e32 v16, v16, v28
 ; SDAG-NEXT:    v_addc_u32_e32 v36, vcc, -1, v10, vcc
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v30
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v30
 ; SDAG-NEXT:    v_cndmask_b32_e64 v17, v38, v17, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v16, v37, v16, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v29, 0, v27, s[4:5]
@@ -2678,7 +2678,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subb_u32_e32 v17, vcc, v20, v17, vcc
 ; SDAG-NEXT:    v_xor_b32_e32 v18, 0x7f, v16
 ; SDAG-NEXT:    v_subb_u32_e32 v20, vcc, 0, v28, vcc
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[8:9], v[16:17]
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[4:5], s[8:9], v[16:17]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v22, 0, 1, s[4:5]
 ; SDAG-NEXT:    v_subb_u32_e32 v21, vcc, 0, v28, vcc
 ; SDAG-NEXT:    v_or_b32_e32 v18, v18, v20
@@ -2718,7 +2718,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_lshr_b64 v[20:21], v[4:5], v27
 ; SDAG-NEXT:    v_or_b32_e32 v17, v17, v21
 ; SDAG-NEXT:    v_or_b32_e32 v16, v16, v20
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v26
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v26
 ; SDAG-NEXT:    v_cndmask_b32_e64 v17, v23, v17, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v16, v22, v16, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v21, 0, v25, s[4:5]
@@ -2748,7 +2748,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_or_b32_e32 v19, v19, v29
 ; SDAG-NEXT:    v_or_b32_e32 v18, v18, v28
 ; SDAG-NEXT:    v_addc_u32_e32 v48, vcc, -1, v14, vcc
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v34
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v34
 ; SDAG-NEXT:    v_cndmask_b32_e64 v19, v50, v19, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v18, v49, v18, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v29, 0, v27, s[4:5]
@@ -2910,7 +2910,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v24, 0, 1, vcc
 ; GISEL-NEXT:    v_or_b32_e32 v20, v23, v16
 ; GISEL-NEXT:    v_or_b32_e32 v21, v19, v17
-; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[16:17]
+; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, 0, v[16:17]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v23, 0, 1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v23, v23, v24, vcc
@@ -2943,7 +2943,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
 ; GISEL-NEXT:    v_lshr_b64 v[20:21], v[0:1], v20
 ; GISEL-NEXT:    v_lshl_b64 v[24:25], v[0:1], v22
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v26
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v26
 ; GISEL-NEXT:    v_cndmask_b32_e32 v22, 0, v16, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v23, 0, v17, vcc
 ; GISEL-NEXT:    v_or_b32_e32 v16, v20, v18
@@ -2976,7 +2976,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    v_or_b32_e32 v18, v18, v24
 ; GISEL-NEXT:    v_or_b32_e32 v19, v19, v25
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v30
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v30
 ; GISEL-NEXT:    v_cndmask_b32_e32 v18, v26, v18, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v19, v27, v19, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v26, 0, v16, vcc
@@ -3081,7 +3081,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[22:23], v[24:25]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v24, 0, 1, vcc
 ; GISEL-NEXT:    v_xor_b32_e32 v18, 0x7f, v22
-; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[16:17]
+; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, 0, v[16:17]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v25, 0, 1, vcc
 ; GISEL-NEXT:    v_or_b32_e32 v18, v18, v16
 ; GISEL-NEXT:    v_or_b32_e32 v19, v23, v17
@@ -3116,7 +3116,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
 ; GISEL-NEXT:    v_lshr_b64 v[22:23], v[4:5], v22
 ; GISEL-NEXT:    v_lshl_b64 v[26:27], v[4:5], v24
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v28
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v28
 ; GISEL-NEXT:    v_cndmask_b32_e32 v24, 0, v16, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v25, 0, v17, vcc
 ; GISEL-NEXT:    v_or_b32_e32 v16, v22, v18
@@ -3149,7 +3149,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    v_or_b32_e32 v18, v18, v26
 ; GISEL-NEXT:    v_or_b32_e32 v19, v19, v27
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v34
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v34
 ; GISEL-NEXT:    v_cndmask_b32_e32 v18, v28, v18, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v19, v29, v19, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v30, 0, v16, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
index bb66bb319d481..4523bd65f9285 100644
--- a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
@@ -70,11 +70,11 @@ define <4 x i16> @vec_8xi16_extract_4xi16(ptr addrspace(1) %p0, ptr addrspace(1)
 ; SI-NEXT:    v_mov_b32_e32 v5, 0xffff0000
 ; SI-NEXT:    v_bfrev_b32_e32 v6, 1
 ; SI-NEXT:    v_mov_b32_e32 v7, 0xffff8000
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v0
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, -1, v0
 ; SI-NEXT:    v_cndmask_b32_e32 v0, v3, v4, vcc
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v1
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, -1, v1
 ; SI-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v2
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, -1, v2
 ; SI-NEXT:    v_cndmask_b32_e32 v2, -1, v7, vcc
 ; SI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; SI-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
@@ -262,13 +262,13 @@ define <4 x i16> @vec_8xi16_extract_4xi16_2(ptr addrspace(1) %p0, ptr addrspace(
 ; SI-NEXT:    v_mov_b32_e32 v5, 0x8000
 ; SI-NEXT:    v_mov_b32_e32 v6, 0xffff0000
 ; SI-NEXT:    v_bfrev_b32_e32 v7, 1
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v0
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, -1, v0
 ; SI-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v1
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, -1, v1
 ; SI-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v3
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, -1, v3
 ; SI-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v2
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, -1, v2
 ; SI-NEXT:    v_cndmask_b32_e32 v4, v6, v7, vcc
 ; SI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; SI-NEXT:    v_or_b32_e32 v2, v3, v4
@@ -693,11 +693,11 @@ define <4 x i16> @vec_16xi16_extract_4xi16(ptr addrspace(1) %p0, ptr addrspace(1
 ; SI-NEXT:    v_mov_b32_e32 v5, 0xffff0000
 ; SI-NEXT:    v_bfrev_b32_e32 v6, 1
 ; SI-NEXT:    v_mov_b32_e32 v7, 0xffff8000
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v0
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, -1, v0
 ; SI-NEXT:    v_cndmask_b32_e32 v0, v3, v4, vcc
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v1
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, -1, v1
 ; SI-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v2
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, -1, v2
 ; SI-NEXT:    v_cndmask_b32_e32 v2, -1, v7, vcc
 ; SI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; SI-NEXT:    v_lshlrev_b32_e32 v4, 16, v2
@@ -933,13 +933,13 @@ define <4 x i16> @vec_16xi16_extract_4xi16_2(ptr addrspace(1) %p0, ptr addrspace
 ; SI-NEXT:    v_mov_b32_e32 v5, 0x8000
 ; SI-NEXT:    v_mov_b32_e32 v6, 0xffff0000
 ; SI-NEXT:    v_bfrev_b32_e32 v7, 1
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v0
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, -1, v0
 ; SI-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v1
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, -1, v1
 ; SI-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v2
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, -1, v2
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v5, vcc
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v3
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, -1, v3
 ; SI-NEXT:    v_cndmask_b32_e32 v3, v6, v7, vcc
 ; SI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; SI-NEXT:    v_or_b32_e32 v2, v2, v3
@@ -1539,21 +1539,21 @@ define amdgpu_gfx <8 x i16> @vec_16xi16_extract_8xi16_0(i1 inreg %cond, ptr addr
 ; SI-NEXT:    v_mov_b32_e32 v9, 0x3900
 ; SI-NEXT:    v_mov_b32_e32 v10, 0x3d000000
 ; SI-NEXT:    v_mov_b32_e32 v11, 0x39000000
-; SI-NEXT:    v_cmp_lt_u32_e32 vcc, s34, v0
+; SI-NEXT:    v_cmp_le_u32_e32 vcc, s34, v0
 ; SI-NEXT:    v_cndmask_b32_e32 v0, v8, v9, vcc
-; SI-NEXT:    v_cmp_lt_u32_e32 vcc, s34, v1
+; SI-NEXT:    v_cmp_le_u32_e32 vcc, s34, v1
 ; SI-NEXT:    v_cndmask_b32_e32 v1, v10, v11, vcc
-; SI-NEXT:    v_cmp_lt_u32_e32 vcc, s34, v5
+; SI-NEXT:    v_cmp_le_u32_e32 vcc, s34, v5
 ; SI-NEXT:    v_cndmask_b32_e32 v5, v8, v9, vcc
-; SI-NEXT:    v_cmp_lt_u32_e32 vcc, s34, v6
+; SI-NEXT:    v_cmp_le_u32_e32 vcc, s34, v6
 ; SI-NEXT:    v_cndmask_b32_e32 v12, v10, v11, vcc
-; SI-NEXT:    v_cmp_lt_u32_e32 vcc, s34, v3
+; SI-NEXT:    v_cmp_le_u32_e32 vcc, s34, v3
 ; SI-NEXT:    v_cndmask_b32_e32 v3, v8, v9, vcc
-; SI-NEXT:    v_cmp_lt_u32_e32 vcc, s34, v7
+; SI-NEXT:    v_cmp_le_u32_e32 vcc, s34, v7
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v10, v11, vcc
-; SI-NEXT:    v_cmp_lt_u32_e32 vcc, s34, v2
+; SI-NEXT:    v_cmp_le_u32_e32 vcc, s34, v2
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v8, v9, vcc
-; SI-NEXT:    v_cmp_lt_u32_e32 vcc, s34, v4
+; SI-NEXT:    v_cmp_le_u32_e32 vcc, s34, v4
 ; SI-NEXT:    v_cndmask_b32_e32 v8, v10, v11, vcc
 ; SI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; SI-NEXT:    v_or_b32_e32 v4, v5, v12
diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll
index 41082821bafe3..28a66f97a79ad 100644
--- a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll
@@ -69,9 +69,9 @@ define <2 x i16> @extract_2xi16(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0xffff
 ; GCN-NEXT:    v_mov_b32_e32 v3, 0x8000
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0xffff8000
-; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v0
+; GCN-NEXT:    v_cmp_le_i32_e32 vcc, -1, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
-; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v1
+; GCN-NEXT:    v_cmp_le_i32_e32 vcc, -1, v1
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, -1, v4, vcc
 ; GCN-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
 ; GCN-NEXT:    v_or_b32_e32 v0, v0, v2
@@ -140,9 +140,9 @@ define <2 x i64> @extract_2xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0xffff8000
-; GCN-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[4:5]
+; GCN-NEXT:    v_cmp_le_i64_e32 vcc, -1, v[4:5]
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, -1, v1, vcc
-; GCN-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[6:7]
+; GCN-NEXT:    v_cmp_le_i64_e32 vcc, -1, v[6:7]
 ; GCN-NEXT:    v_cndmask_b32_e32 v2, -1, v1, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v1, -1
 ; GCN-NEXT:    v_mov_b32_e32 v3, -1
@@ -210,13 +210,13 @@ define <4 x i64> @extract_4xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0xffff8000
-; GCN-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[4:5]
+; GCN-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[4:5]
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, -1, vcc
-; GCN-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[6:7]
+; GCN-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[6:7]
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, v1, -1, vcc
-; GCN-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[8:9]
+; GCN-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[8:9]
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, v1, -1, vcc
-; GCN-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[10:11]
+; GCN-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[10:11]
 ; GCN-NEXT:    v_cndmask_b32_e64 v6, v1, -1, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v1, -1
 ; GCN-NEXT:    v_mov_b32_e32 v3, -1
@@ -300,15 +300,15 @@ define <8 x i64> @extract_8xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %
 ; GCN-NEXT:  .LBB3_4: ; %exit
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0xffff8000
-; GCN-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[6:7]
-; GCN-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[8:9]
-; GCN-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[10:11]
-; GCN-NEXT:    v_cmp_gt_i64_e64 s[8:9], 0, v[12:13]
-; GCN-NEXT:    v_cmp_gt_i64_e64 s[10:11], 0, v[14:15]
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_gt_i64_e64 s[12:13], 0, v[16:17]
-; GCN-NEXT:    v_cmp_gt_i64_e64 s[14:15], 0, v[18:19]
-; GCN-NEXT:    v_cmp_gt_i64_e64 s[16:17], 0, v[4:5]
+; GCN-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[6:7]
+; GCN-NEXT:    v_cmp_ge_i64_e64 s[4:5], 0, v[8:9]
+; GCN-NEXT:    v_cmp_ge_i64_e64 s[6:7], 0, v[10:11]
+; GCN-NEXT:    v_cmp_ge_i64_e64 s[8:9], 0, v[12:13]
+; GCN-NEXT:    v_cmp_ge_i64_e64 s[10:11], 0, v[14:15]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_cmp_ge_i64_e64 s[12:13], 0, v[16:17]
+; GCN-NEXT:    v_cmp_ge_i64_e64 s[14:15], 0, v[18:19]
+; GCN-NEXT:    v_cmp_ge_i64_e64 s[16:17], 0, v[4:5]
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, -1, s[16:17]
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, v1, -1, vcc
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, v1, -1, s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
index a3ec35da29f67..11c63e5074f47 100644
--- a/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
@@ -880,10 +880,10 @@ define half @v_copysign_out_f16_mag_f64_sign_f16(double %mag, half %sign) {
 ; SI-NEXT:    v_lshlrev_b32_e32 v5, 12, v4
 ; SI-NEXT:    v_or_b32_e32 v3, v6, v3
 ; SI-NEXT:    v_or_b32_e32 v5, v0, v5
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v4
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v4
 ; SI-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
 ; SI-NEXT:    v_and_b32_e32 v5, 7, v3
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v5
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v5
 ; SI-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v5
 ; SI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
@@ -891,7 +891,7 @@ define half @v_copysign_out_f16_mag_f64_sign_f16(double %mag, half %sign) {
 ; SI-NEXT:    v_lshrrev_b32_e32 v3, 2, v3
 ; SI-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
 ; SI-NEXT:    v_mov_b32_e32 v5, 0x7c00
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v4
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v4
 ; SI-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
 ; SI-NEXT:    v_mov_b32_e32 v6, 0x7e00
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
@@ -930,10 +930,10 @@ define half @v_copysign_out_f16_mag_f64_sign_f16(double %mag, half %sign) {
 ; VI-NEXT:    v_lshlrev_b32_e32 v4, 12, v1
 ; VI-NEXT:    v_or_b32_e32 v3, v5, v3
 ; VI-NEXT:    v_or_b32_e32 v4, v0, v4
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v1
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v1
 ; VI-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
 ; VI-NEXT:    v_and_b32_e32 v4, 7, v3
-; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
+; VI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v4
 ; VI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v4
 ; VI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
@@ -941,7 +941,7 @@ define half @v_copysign_out_f16_mag_f64_sign_f16(double %mag, half %sign) {
 ; VI-NEXT:    v_lshrrev_b32_e32 v3, 2, v3
 ; VI-NEXT:    v_add_u32_e32 v3, vcc, v3, v4
 ; VI-NEXT:    v_mov_b32_e32 v4, 0x7c00
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v1
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v1
 ; VI-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
 ; VI-NEXT:    v_mov_b32_e32 v5, 0x7e00
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
@@ -974,10 +974,10 @@ define half @v_copysign_out_f16_mag_f64_sign_f16(double %mag, half %sign) {
 ; GFX9-NEXT:    v_add_u32_e32 v1, 0xfffffc10, v1
 ; GFX9-NEXT:    v_or_b32_e32 v3, v5, v3
 ; GFX9-NEXT:    v_lshl_or_b32 v4, v1, 12, v0
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v1
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
 ; GFX9-NEXT:    v_and_b32_e32 v4, 7, v3
-; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
+; GFX9-NEXT:    v_cmp_le_i32_e32 vcc, 5, v4
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v4
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
@@ -985,7 +985,7 @@ define half @v_copysign_out_f16_mag_f64_sign_f16(double %mag, half %sign) {
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 2, v3
 ; GFX9-NEXT:    v_add_u32_e32 v3, v3, v4
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7c00
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v1
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v5, 0x7e00
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
@@ -1019,18 +1019,18 @@ define half @v_copysign_out_f16_mag_f64_sign_f16(double %mag, half %sign) {
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v3, v4
 ; GFX11-NEXT:    v_lshl_or_b32 v4, v1, 12, v0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v1
+; GFX11-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v1
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_or_b32_e32 v3, v5, v3
 ; GFX11-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_and_b32_e32 v4, 7, v3
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 2, v3
-; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v4
+; GFX11-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v4
 ; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v4
 ; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v1
+; GFX11-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v1
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_or_b32_e32 v4, v4, v5
 ; GFX11-NEXT:    v_dual_mov_b32 v4, 0x7e00 :: v_dual_add_nc_u32 v3, v3, v4
@@ -3107,10 +3107,10 @@ define <2 x half> @v_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> %mag,
 ; SI-NEXT:    v_lshlrev_b32_e32 v8, 12, v7
 ; SI-NEXT:    v_or_b32_e32 v6, v9, v6
 ; SI-NEXT:    v_or_b32_e32 v8, v2, v8
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v7
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v7
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v8, v6, vcc
 ; SI-NEXT:    v_and_b32_e32 v8, 7, v6
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v8
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v8
 ; SI-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
 ; SI-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
@@ -3118,7 +3118,7 @@ define <2 x half> @v_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> %mag,
 ; SI-NEXT:    v_lshrrev_b32_e32 v6, 2, v6
 ; SI-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
 ; SI-NEXT:    v_mov_b32_e32 v8, 0x7c00
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v7
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v7
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v8, v6, vcc
 ; SI-NEXT:    v_mov_b32_e32 v9, 0x7e00
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
@@ -3148,17 +3148,17 @@ define <2 x half> @v_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> %mag,
 ; SI-NEXT:    v_lshlrev_b32_e32 v7, 12, v6
 ; SI-NEXT:    v_or_b32_e32 v3, v10, v3
 ; SI-NEXT:    v_or_b32_e32 v7, v0, v7
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v6
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v6
 ; SI-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
 ; SI-NEXT:    v_and_b32_e32 v7, 7, v3
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v7
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v7
 ; SI-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v7
 ; SI-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; SI-NEXT:    v_or_b32_e32 v7, v7, v10
 ; SI-NEXT:    v_lshrrev_b32_e32 v3, 2, v3
 ; SI-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v6
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v6
 ; SI-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; SI-NEXT:    v_cvt_f16_f32_e32 v4, v4
 ; SI-NEXT:    v_cndmask_b32_e32 v3, v8, v3, vcc
@@ -3202,10 +3202,10 @@ define <2 x half> @v_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> %mag,
 ; VI-NEXT:    v_lshlrev_b32_e32 v6, 12, v3
 ; VI-NEXT:    v_or_b32_e32 v5, v7, v5
 ; VI-NEXT:    v_or_b32_e32 v6, v2, v6
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v3
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
 ; VI-NEXT:    v_and_b32_e32 v6, 7, v5
-; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v6
+; VI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v6
 ; VI-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v6
 ; VI-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
@@ -3213,7 +3213,7 @@ define <2 x half> @v_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> %mag,
 ; VI-NEXT:    v_lshrrev_b32_e32 v5, 2, v5
 ; VI-NEXT:    v_add_u32_e32 v5, vcc, v5, v6
 ; VI-NEXT:    v_mov_b32_e32 v6, 0x7c00
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
 ; VI-NEXT:    v_mov_b32_e32 v7, 0x7e00
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
@@ -3240,17 +3240,17 @@ define <2 x half> @v_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> %mag,
 ; VI-NEXT:    v_lshlrev_b32_e32 v5, 12, v1
 ; VI-NEXT:    v_or_b32_e32 v3, v8, v3
 ; VI-NEXT:    v_or_b32_e32 v5, v0, v5
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v1
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v1
 ; VI-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
 ; VI-NEXT:    v_and_b32_e32 v5, 7, v3
-; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v5
+; VI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v5
 ; VI-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v5
 ; VI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; VI-NEXT:    v_or_b32_e32 v5, v5, v8
 ; VI-NEXT:    v_lshrrev_b32_e32 v3, 2, v3
 ; VI-NEXT:    v_add_u32_e32 v3, vcc, v3, v5
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v1
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v1
 ; VI-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v6, v7, vcc
@@ -3284,10 +3284,10 @@ define <2 x half> @v_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> %mag,
 ; GFX9-NEXT:    v_add_u32_e32 v6, 0xfffffc10, v6
 ; GFX9-NEXT:    v_or_b32_e32 v5, v8, v5
 ; GFX9-NEXT:    v_lshl_or_b32 v7, v6, 12, v0
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v6
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v6
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
 ; GFX9-NEXT:    v_and_b32_e32 v7, 7, v5
-; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v7
+; GFX9-NEXT:    v_cmp_le_i32_e32 vcc, 5, v7
 ; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v7
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
@@ -3295,7 +3295,7 @@ define <2 x half> @v_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> %mag,
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 2, v5
 ; GFX9-NEXT:    v_add_u32_e32 v5, v5, v7
 ; GFX9-NEXT:    v_mov_b32_e32 v7, 0x7c00
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v6
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v6
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7e00
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
@@ -3322,17 +3322,17 @@ define <2 x half> @v_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> %mag,
 ; GFX9-NEXT:    v_add_u32_e32 v5, 0xfffffc10, v5
 ; GFX9-NEXT:    v_or_b32_e32 v2, v9, v2
 ; GFX9-NEXT:    v_lshl_or_b32 v6, v5, 12, v1
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v5
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
 ; GFX9-NEXT:    v_and_b32_e32 v6, 7, v2
-; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v6
+; GFX9-NEXT:    v_cmp_le_i32_e32 vcc, 5, v6
 ; GFX9-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v6
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GFX9-NEXT:    v_or_b32_e32 v6, v6, v9
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
 ; GFX9-NEXT:    v_add_u32_e32 v2, v2, v6
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v5
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
@@ -3384,13 +3384,13 @@ define <2 x half> @v_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> %mag,
 ; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v5, v10
 ; GFX11-TRUE16-NEXT:    v_or_b32_e32 v8, v11, v8
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v6
+; GFX11-TRUE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v6
 ; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v7, 0xfffffc10, v7
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
 ; GFX11-TRUE16-NEXT:    v_or_b32_e32 v5, v12, v5
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc_lo
 ; GFX11-TRUE16-NEXT:    v_lshl_or_b32 v10, v7, 12, v0
-; GFX11-TRUE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v7
+; GFX11-TRUE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v7
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX11-TRUE16-NEXT:    v_dual_cndmask_b32 v5, v10, v5 :: v_dual_mov_b32 v10, 0x7e00
 ; GFX11-TRUE16-NEXT:    v_and_b32_e32 v9, 7, v8
@@ -3398,11 +3398,11 @@ define <2 x half> @v_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> %mag,
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
 ; GFX11-TRUE16-NEXT:    v_and_b32_e32 v11, 7, v5
 ; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 2, v5
-; GFX11-TRUE16-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v9
+; GFX11-TRUE16-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v9
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc_lo
 ; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v9
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v11
+; GFX11-TRUE16-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v11
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-TRUE16-NEXT:    v_or_b32_e32 v9, v9, v12
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc_lo
@@ -3415,11 +3415,11 @@ define <2 x half> @v_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> %mag,
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-TRUE16-NEXT:    v_or_b32_e32 v11, v11, v13
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e32 v2, 0x7c00, v10, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v6
+; GFX11-TRUE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v6
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e32 v3, 0x7c00, v3, vcc_lo
 ; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX11-TRUE16-NEXT:    v_dual_cndmask_b32 v0, 0x7c00, v10 :: v_dual_add_nc_u32 v5, v5, v11
-; GFX11-TRUE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v7
+; GFX11-TRUE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v7
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e32 v5, 0x7c00, v5, vcc_lo
 ; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v6
@@ -3476,25 +3476,25 @@ define <2 x half> @v_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> %mag,
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
 ; GFX11-FAKE16-NEXT:    v_or_b32_e32 v8, v11, v8
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v6
+; GFX11-FAKE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v6
 ; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v7, 0xfffffc10, v7
 ; GFX11-FAKE16-NEXT:    v_or_b32_e32 v5, v12, v5
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc_lo
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
 ; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v10, v7, 12, v2
-; GFX11-FAKE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v7
+; GFX11-FAKE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v7
 ; GFX11-FAKE16-NEXT:    v_and_b32_e32 v9, 7, v8
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 2, v8
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v10, v5, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v9
+; GFX11-FAKE16-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v9
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-FAKE16-NEXT:    v_and_b32_e32 v10, 7, v5
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 2, v5
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v9
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v10
+; GFX11-FAKE16-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v10
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
 ; GFX11-FAKE16-NEXT:    v_or_b32_e32 v9, v9, v11
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc_lo
@@ -3507,9 +3507,9 @@ define <2 x half> @v_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> %mag,
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7c00, v13, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v2
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7c00, v13, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v6
+; GFX11-FAKE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v6
 ; GFX11-FAKE16-NEXT:    v_dual_cndmask_b32 v8, 0x7c00, v8 :: v_dual_add_nc_u32 v5, v5, v10
-; GFX11-FAKE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v7
+; GFX11-FAKE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v7
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, 0x7c00, v5, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v6
@@ -4755,10 +4755,10 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; SI-NEXT:    v_lshlrev_b32_e32 v11, 12, v10
 ; SI-NEXT:    v_or_b32_e32 v9, v12, v9
 ; SI-NEXT:    v_or_b32_e32 v11, v4, v11
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v10
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v10
 ; SI-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc
 ; SI-NEXT:    v_and_b32_e32 v11, 7, v9
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v11
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v11
 ; SI-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v11
 ; SI-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
@@ -4766,7 +4766,7 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; SI-NEXT:    v_lshrrev_b32_e32 v9, 2, v9
 ; SI-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
 ; SI-NEXT:    v_mov_b32_e32 v11, 0x7c00
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v10
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v10
 ; SI-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc
 ; SI-NEXT:    v_mov_b32_e32 v12, 0x7e00
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
@@ -4796,17 +4796,17 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; SI-NEXT:    v_lshlrev_b32_e32 v10, 12, v9
 ; SI-NEXT:    v_or_b32_e32 v5, v13, v5
 ; SI-NEXT:    v_or_b32_e32 v10, v2, v10
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v9
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v9
 ; SI-NEXT:    v_cndmask_b32_e32 v5, v10, v5, vcc
 ; SI-NEXT:    v_and_b32_e32 v10, 7, v5
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v10
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v10
 ; SI-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v10
 ; SI-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; SI-NEXT:    v_or_b32_e32 v10, v10, v13
 ; SI-NEXT:    v_lshrrev_b32_e32 v5, 2, v5
 ; SI-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v9
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v9
 ; SI-NEXT:    v_cndmask_b32_e32 v5, v11, v5, vcc
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v11, v12, vcc
@@ -4834,17 +4834,17 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; SI-NEXT:    v_lshlrev_b32_e32 v9, 12, v5
 ; SI-NEXT:    v_or_b32_e32 v3, v10, v3
 ; SI-NEXT:    v_or_b32_e32 v9, v0, v9
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v5
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v5
 ; SI-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
 ; SI-NEXT:    v_and_b32_e32 v9, 7, v3
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v9
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v9
 ; SI-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v9
 ; SI-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; SI-NEXT:    v_or_b32_e32 v9, v9, v10
 ; SI-NEXT:    v_lshrrev_b32_e32 v3, 2, v3
 ; SI-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v5
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v5
 ; SI-NEXT:    v_cvt_f16_f32_e32 v7, v7
 ; SI-NEXT:    v_cndmask_b32_e32 v3, v11, v3, vcc
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
@@ -4888,10 +4888,10 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; VI-NEXT:    v_lshlrev_b32_e32 v9, 12, v5
 ; VI-NEXT:    v_or_b32_e32 v8, v10, v8
 ; VI-NEXT:    v_or_b32_e32 v9, v4, v9
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v5
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v5
 ; VI-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc
 ; VI-NEXT:    v_and_b32_e32 v9, 7, v8
-; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v9
+; VI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v9
 ; VI-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v9
 ; VI-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
@@ -4899,7 +4899,7 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; VI-NEXT:    v_lshrrev_b32_e32 v8, 2, v8
 ; VI-NEXT:    v_add_u32_e32 v8, vcc, v8, v9
 ; VI-NEXT:    v_mov_b32_e32 v9, 0x7c00
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v5
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v5
 ; VI-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc
 ; VI-NEXT:    v_mov_b32_e32 v10, 0x7e00
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
@@ -4926,17 +4926,17 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; VI-NEXT:    v_lshlrev_b32_e32 v8, 12, v1
 ; VI-NEXT:    v_or_b32_e32 v5, v11, v5
 ; VI-NEXT:    v_or_b32_e32 v8, v0, v8
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v1
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v1
 ; VI-NEXT:    v_cndmask_b32_e32 v5, v8, v5, vcc
 ; VI-NEXT:    v_and_b32_e32 v8, 7, v5
-; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v8
+; VI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v8
 ; VI-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
 ; VI-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; VI-NEXT:    v_or_b32_e32 v8, v8, v11
 ; VI-NEXT:    v_lshrrev_b32_e32 v5, 2, v5
 ; VI-NEXT:    v_add_u32_e32 v5, vcc, v5, v8
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v1
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v1
 ; VI-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v9, v10, vcc
@@ -4961,17 +4961,17 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; VI-NEXT:    v_lshlrev_b32_e32 v5, 12, v3
 ; VI-NEXT:    v_or_b32_e32 v2, v8, v2
 ; VI-NEXT:    v_or_b32_e32 v5, v1, v5
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v3
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
 ; VI-NEXT:    v_and_b32_e32 v5, 7, v2
-; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v5
+; VI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v5
 ; VI-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v5
 ; VI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; VI-NEXT:    v_or_b32_e32 v5, v5, v8
 ; VI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
 ; VI-NEXT:    v_add_u32_e32 v2, vcc, v2, v5
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v2, v9, v2, vcc
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
@@ -5006,10 +5006,10 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; GFX9-NEXT:    v_add_u32_e32 v5, 0xfffffc10, v5
 ; GFX9-NEXT:    v_or_b32_e32 v8, v10, v8
 ; GFX9-NEXT:    v_lshl_or_b32 v9, v5, 12, v4
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v5
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc
 ; GFX9-NEXT:    v_and_b32_e32 v9, 7, v8
-; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v9
+; GFX9-NEXT:    v_cmp_le_i32_e32 vcc, 5, v9
 ; GFX9-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v9
 ; GFX9-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
@@ -5017,7 +5017,7 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 2, v8
 ; GFX9-NEXT:    v_add_u32_e32 v8, v8, v9
 ; GFX9-NEXT:    v_mov_b32_e32 v9, 0x7c00
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v5
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v10, 0x7e00
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
@@ -5041,17 +5041,17 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; GFX9-NEXT:    v_add_u32_e32 v8, 0xfffffc10, v8
 ; GFX9-NEXT:    v_or_b32_e32 v5, v12, v5
 ; GFX9-NEXT:    v_lshl_or_b32 v11, v8, 12, v0
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v8
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v8
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v11, v5, vcc
 ; GFX9-NEXT:    v_and_b32_e32 v11, 7, v5
-; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v11
+; GFX9-NEXT:    v_cmp_le_i32_e32 vcc, 5, v11
 ; GFX9-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v11
 ; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
 ; GFX9-NEXT:    v_or_b32_e32 v11, v11, v12
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 2, v5
 ; GFX9-NEXT:    v_add_u32_e32 v5, v5, v11
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v8
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v8
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v9, v10, vcc
@@ -5076,17 +5076,17 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; GFX9-NEXT:    v_add_u32_e32 v5, 0xfffffc10, v5
 ; GFX9-NEXT:    v_or_b32_e32 v2, v11, v2
 ; GFX9-NEXT:    v_lshl_or_b32 v8, v5, 12, v1
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v5
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
 ; GFX9-NEXT:    v_and_b32_e32 v8, 7, v2
-; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v8
+; GFX9-NEXT:    v_cmp_le_i32_e32 vcc, 5, v8
 ; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
 ; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; GFX9-NEXT:    v_or_b32_e32 v8, v8, v11
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
 ; GFX9-NEXT:    v_add_u32_e32 v2, v2, v8
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v5
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v9, v2, vcc
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
@@ -5150,7 +5150,7 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; GFX11-TRUE16-NEXT:    v_lshlrev_b32_e32 v13, v13, v12
 ; GFX11-TRUE16-NEXT:    v_or_b32_e32 v8, v15, v8
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v10
+; GFX11-TRUE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v10
 ; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v5, 0xfffffc10, v5
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
 ; GFX11-TRUE16-NEXT:    v_or_b32_e32 v9, v16, v9
@@ -5161,16 +5161,16 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
 ; GFX11-TRUE16-NEXT:    v_and_b32_e32 v15, 7, v8
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v5
+; GFX11-TRUE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v5
 ; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v8, 2, v8
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v15
+; GFX11-TRUE16-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v15
 ; GFX11-TRUE16-NEXT:    v_or_b32_e32 v11, v12, v13
 ; GFX11-TRUE16-NEXT:    v_lshl_or_b32 v12, v14, 12, v0
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc_lo
 ; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v15
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v14
+; GFX11-TRUE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v14
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
 ; GFX11-TRUE16-NEXT:    v_or_b32_e32 v13, v15, v13
 ; GFX11-TRUE16-NEXT:    v_dual_cndmask_b32 v11, v12, v11 :: v_dual_and_b32 v12, 7, v9
@@ -5178,18 +5178,18 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v8, v8, v13
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX11-TRUE16-NEXT:    v_and_b32_e32 v15, 7, v11
-; GFX11-TRUE16-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v12
+; GFX11-TRUE16-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v12
 ; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v11, 2, v11
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v15
+; GFX11-TRUE16-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v15
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc_lo
 ; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v15
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc_lo
 ; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v12
 ; GFX11-TRUE16-NEXT:    v_or_b32_e32 v13, v15, v13
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v10
+; GFX11-TRUE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v10
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v11, v11, v13
 ; GFX11-TRUE16-NEXT:    v_or_b32_e32 v12, v12, v16
@@ -5199,7 +5199,7 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; GFX11-TRUE16-NEXT:    v_dual_cndmask_b32 v2, 0x7c00, v17 :: v_dual_add_nc_u32 v9, v9, v12
 ; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v10
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v14
+; GFX11-TRUE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v14
 ; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v8.l, v3.h
 ; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v8.h, 0
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e32 v3, 0x7c00, v11, vcc_lo
@@ -5210,7 +5210,7 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e32 v0, 0x7c00, v17, vcc_lo
 ; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v14
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v5
+; GFX11-TRUE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v5
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-TRUE16-NEXT:    v_and_or_b32 v0, 0x8000, v8, v0
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e32 v1, 0x7c00, v9, vcc_lo
@@ -5283,20 +5283,20 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; GFX11-FAKE16-NEXT:    v_or_b32_e32 v5, v16, v5
 ; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v10, 0xfffffc10, v10
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v8
+; GFX11-FAKE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v8
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v15, v10, 12, v0
 ; GFX11-FAKE16-NEXT:    v_or_b32_e32 v11, v11, v12
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, v14, v9, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v10
+; GFX11-FAKE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v10
 ; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v12, v13, 12, v2
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v15, v5, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v13
+; GFX11-FAKE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v13
 ; GFX11-FAKE16-NEXT:    v_and_b32_e32 v14, 7, v9
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 2, v9
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v11, v12, v11, vcc_lo
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v14
+; GFX11-FAKE16-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v14
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v14
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc_lo
@@ -5307,25 +5307,25 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; GFX11-FAKE16-NEXT:    v_and_b32_e32 v16, 7, v11
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 2, v5
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v11, 2, v11
-; GFX11-FAKE16-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v15
+; GFX11-FAKE16-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v15
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v15
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v16
+; GFX11-FAKE16-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v16
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
 ; GFX11-FAKE16-NEXT:    v_or_b32_e32 v14, v15, v17
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
 ; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v5, v5, v14
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v10
+; GFX11-FAKE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v10
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX11-FAKE16-NEXT:    v_or_b32_e32 v15, v16, v18
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, 0x7c00, v5, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-FAKE16-NEXT:    v_dual_cndmask_b32 v0, 0x7c00, v12 :: v_dual_add_nc_u32 v11, v11, v15
-; GFX11-FAKE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v13
+; GFX11-FAKE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v13
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v11, 0x7c00, v11, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v2
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7c00, v12, vcc_lo
@@ -5335,7 +5335,7 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f64_sign_v3f16(<3 x double> %mag,
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX11-FAKE16-NEXT:    v_and_or_b32 v0, 0x8000, v1, v0
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v8
+; GFX11-FAKE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v8
 ; GFX11-FAKE16-NEXT:    v_and_or_b32 v1, 0x8000, v3, v2
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, 0x7c00, v9, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v4
@@ -5898,10 +5898,10 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; SI-NEXT:    v_lshlrev_b32_e32 v14, 12, v13
 ; SI-NEXT:    v_or_b32_e32 v12, v15, v12
 ; SI-NEXT:    v_or_b32_e32 v14, v6, v14
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v13
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v13
 ; SI-NEXT:    v_cndmask_b32_e32 v12, v14, v12, vcc
 ; SI-NEXT:    v_and_b32_e32 v14, 7, v12
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v14
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v14
 ; SI-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v14
 ; SI-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
@@ -5909,7 +5909,7 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; SI-NEXT:    v_lshrrev_b32_e32 v12, 2, v12
 ; SI-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
 ; SI-NEXT:    v_mov_b32_e32 v14, 0x7c00
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v13
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v13
 ; SI-NEXT:    v_cndmask_b32_e32 v12, v14, v12, vcc
 ; SI-NEXT:    v_mov_b32_e32 v15, 0x7e00
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
@@ -5939,17 +5939,17 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; SI-NEXT:    v_lshlrev_b32_e32 v13, 12, v12
 ; SI-NEXT:    v_or_b32_e32 v7, v16, v7
 ; SI-NEXT:    v_or_b32_e32 v13, v4, v13
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v12
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v12
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v13, v7, vcc
 ; SI-NEXT:    v_and_b32_e32 v13, 7, v7
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v13
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v13
 ; SI-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v13
 ; SI-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
 ; SI-NEXT:    v_or_b32_e32 v13, v13, v16
 ; SI-NEXT:    v_lshrrev_b32_e32 v7, 2, v7
 ; SI-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v12
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v12
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v14, v7, vcc
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; SI-NEXT:    v_cndmask_b32_e32 v4, v14, v15, vcc
@@ -5977,17 +5977,17 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; SI-NEXT:    v_lshlrev_b32_e32 v12, 12, v7
 ; SI-NEXT:    v_or_b32_e32 v5, v13, v5
 ; SI-NEXT:    v_or_b32_e32 v12, v2, v12
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v7
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v7
 ; SI-NEXT:    v_cndmask_b32_e32 v5, v12, v5, vcc
 ; SI-NEXT:    v_and_b32_e32 v12, 7, v5
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v12
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v12
 ; SI-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v12
 ; SI-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
 ; SI-NEXT:    v_or_b32_e32 v12, v12, v13
 ; SI-NEXT:    v_lshrrev_b32_e32 v5, 2, v5
 ; SI-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v7
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v7
 ; SI-NEXT:    v_cndmask_b32_e32 v5, v14, v5, vcc
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v14, v15, vcc
@@ -6015,17 +6015,17 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; SI-NEXT:    v_lshlrev_b32_e32 v7, 12, v5
 ; SI-NEXT:    v_or_b32_e32 v3, v12, v3
 ; SI-NEXT:    v_or_b32_e32 v7, v0, v7
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v5
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v5
 ; SI-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
 ; SI-NEXT:    v_and_b32_e32 v7, 7, v3
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v7
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v7
 ; SI-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v7
 ; SI-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; SI-NEXT:    v_or_b32_e32 v7, v7, v12
 ; SI-NEXT:    v_lshrrev_b32_e32 v3, 2, v3
 ; SI-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v5
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v5
 ; SI-NEXT:    v_cndmask_b32_e32 v3, v14, v3, vcc
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; SI-NEXT:    v_cndmask_b32_e32 v0, v14, v15, vcc
@@ -6069,10 +6069,10 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; VI-NEXT:    v_lshlrev_b32_e32 v11, 12, v5
 ; VI-NEXT:    v_or_b32_e32 v10, v12, v10
 ; VI-NEXT:    v_or_b32_e32 v11, v4, v11
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v5
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v5
 ; VI-NEXT:    v_cndmask_b32_e32 v10, v11, v10, vcc
 ; VI-NEXT:    v_and_b32_e32 v11, 7, v10
-; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v11
+; VI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v11
 ; VI-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v11
 ; VI-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
@@ -6080,7 +6080,7 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; VI-NEXT:    v_lshrrev_b32_e32 v10, 2, v10
 ; VI-NEXT:    v_add_u32_e32 v10, vcc, v10, v11
 ; VI-NEXT:    v_mov_b32_e32 v11, 0x7c00
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v5
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v5
 ; VI-NEXT:    v_cndmask_b32_e32 v10, v11, v10, vcc
 ; VI-NEXT:    v_mov_b32_e32 v12, 0x7e00
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
@@ -6107,17 +6107,17 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; VI-NEXT:    v_lshlrev_b32_e32 v10, 12, v7
 ; VI-NEXT:    v_or_b32_e32 v6, v13, v6
 ; VI-NEXT:    v_or_b32_e32 v10, v5, v10
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v7
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v7
 ; VI-NEXT:    v_cndmask_b32_e32 v6, v10, v6, vcc
 ; VI-NEXT:    v_and_b32_e32 v10, 7, v6
-; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v10
+; VI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v10
 ; VI-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v10
 ; VI-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; VI-NEXT:    v_or_b32_e32 v10, v10, v13
 ; VI-NEXT:    v_lshrrev_b32_e32 v6, 2, v6
 ; VI-NEXT:    v_add_u32_e32 v6, vcc, v6, v10
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v7
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v7
 ; VI-NEXT:    v_cndmask_b32_e32 v6, v11, v6, vcc
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
 ; VI-NEXT:    v_cndmask_b32_e32 v5, v11, v12, vcc
@@ -6142,17 +6142,17 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; VI-NEXT:    v_lshlrev_b32_e32 v7, 12, v1
 ; VI-NEXT:    v_or_b32_e32 v6, v10, v6
 ; VI-NEXT:    v_or_b32_e32 v7, v0, v7
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v1
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v1
 ; VI-NEXT:    v_cndmask_b32_e32 v6, v7, v6, vcc
 ; VI-NEXT:    v_and_b32_e32 v7, 7, v6
-; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v7
+; VI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v7
 ; VI-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v7
 ; VI-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; VI-NEXT:    v_or_b32_e32 v7, v7, v10
 ; VI-NEXT:    v_lshrrev_b32_e32 v6, 2, v6
 ; VI-NEXT:    v_add_u32_e32 v6, vcc, v6, v7
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v1
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v1
 ; VI-NEXT:    v_cndmask_b32_e32 v6, v11, v6, vcc
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v11, v12, vcc
@@ -6177,17 +6177,17 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; VI-NEXT:    v_lshlrev_b32_e32 v6, 12, v3
 ; VI-NEXT:    v_or_b32_e32 v2, v7, v2
 ; VI-NEXT:    v_or_b32_e32 v6, v1, v6
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v3
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
 ; VI-NEXT:    v_and_b32_e32 v6, 7, v2
-; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v6
+; VI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v6
 ; VI-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v6
 ; VI-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; VI-NEXT:    v_or_b32_e32 v6, v6, v7
 ; VI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
 ; VI-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v11, v12, vcc
@@ -6225,10 +6225,10 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX9-NEXT:    v_add_u32_e32 v11, 0xfffffc10, v11
 ; GFX9-NEXT:    v_or_b32_e32 v10, v13, v10
 ; GFX9-NEXT:    v_lshl_or_b32 v12, v11, 12, v4
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v11
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v11
 ; GFX9-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
 ; GFX9-NEXT:    v_and_b32_e32 v12, 7, v10
-; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v12
+; GFX9-NEXT:    v_cmp_le_i32_e32 vcc, 5, v12
 ; GFX9-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v12
 ; GFX9-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
@@ -6236,7 +6236,7 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v10, 2, v10
 ; GFX9-NEXT:    v_add_u32_e32 v10, v10, v12
 ; GFX9-NEXT:    v_mov_b32_e32 v12, 0x7c00
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v11
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v11
 ; GFX9-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v13, 0x7e00
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
@@ -6263,17 +6263,17 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX9-NEXT:    v_add_u32_e32 v10, 0xfffffc10, v10
 ; GFX9-NEXT:    v_or_b32_e32 v6, v14, v6
 ; GFX9-NEXT:    v_lshl_or_b32 v11, v10, 12, v5
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v10
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v10
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v11, v6, vcc
 ; GFX9-NEXT:    v_and_b32_e32 v11, 7, v6
-; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v11
+; GFX9-NEXT:    v_cmp_le_i32_e32 vcc, 5, v11
 ; GFX9-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v11
 ; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
 ; GFX9-NEXT:    v_or_b32_e32 v11, v11, v14
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 2, v6
 ; GFX9-NEXT:    v_add_u32_e32 v6, v6, v11
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v10
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v10
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v12, v6, vcc
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v12, v13, vcc
@@ -6297,17 +6297,17 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX9-NEXT:    v_add_u32_e32 v7, 0xfffffc10, v7
 ; GFX9-NEXT:    v_or_b32_e32 v6, v11, v6
 ; GFX9-NEXT:    v_lshl_or_b32 v10, v7, 12, v0
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v7
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v7
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v10, v6, vcc
 ; GFX9-NEXT:    v_and_b32_e32 v10, 7, v6
-; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v10
+; GFX9-NEXT:    v_cmp_le_i32_e32 vcc, 5, v10
 ; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v10
 ; GFX9-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; GFX9-NEXT:    v_or_b32_e32 v10, v10, v11
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 2, v6
 ; GFX9-NEXT:    v_add_u32_e32 v6, v6, v10
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v7
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v7
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v12, v6, vcc
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v12, v13, vcc
@@ -6331,17 +6331,17 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX9-NEXT:    v_add_u32_e32 v6, 0xfffffc10, v6
 ; GFX9-NEXT:    v_or_b32_e32 v2, v10, v2
 ; GFX9-NEXT:    v_lshl_or_b32 v7, v6, 12, v1
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v6
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v6
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
 ; GFX9-NEXT:    v_and_b32_e32 v7, 7, v2
-; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v7
+; GFX9-NEXT:    v_cmp_le_i32_e32 vcc, 5, v7
 ; GFX9-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v7
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; GFX9-NEXT:    v_or_b32_e32 v7, v7, v10
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
 ; GFX9-NEXT:    v_add_u32_e32 v2, v2, v7
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v6
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v6
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v12, v2, vcc
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v12, v13, vcc
@@ -6393,7 +6393,7 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc_lo
 ; GFX11-TRUE16-NEXT:    v_sub_nc_u32_e32 v14, 0x3f1, v17
 ; GFX11-TRUE16-NEXT:    v_lshl_or_b32 v21, v10, 12, v6
-; GFX11-TRUE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v10
+; GFX11-TRUE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v10
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX11-TRUE16-NEXT:    v_and_or_b32 v4, 0xffe, v16, v4
 ; GFX11-TRUE16-NEXT:    v_med3_i32 v14, v14, 0, 13
@@ -6411,7 +6411,7 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3)
 ; GFX11-TRUE16-NEXT:    v_and_or_b32 v2, 0xffe, v18, v2
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v16
+; GFX11-TRUE16-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v16
 ; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v18, v14, v22
 ; GFX11-TRUE16-NEXT:    v_and_or_b32 v0, 0xffe, v12, v0
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v23, 0, 1, vcc_lo
@@ -6427,7 +6427,7 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v12, v22
 ; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v22, v21, v14
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v10
+; GFX11-TRUE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v10
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_4)
 ; GFX11-TRUE16-NEXT:    v_or_b32_e32 v12, v18, v12
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e32 v11, 0x7c00, v11, vcc_lo
@@ -6437,7 +6437,7 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e32 v6, v11, v6, vcc_lo
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
 ; GFX11-TRUE16-NEXT:    v_lshl_or_b32 v16, v17, 12, v4
-; GFX11-TRUE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v17
+; GFX11-TRUE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v17
 ; GFX11-TRUE16-NEXT:    v_lshlrev_b32_e32 v11, v21, v22
 ; GFX11-TRUE16-NEXT:    v_med3_i32 v10, v10, 0, 13
 ; GFX11-TRUE16-NEXT:    v_and_or_b32 v6, 0x8000, v15, v6
@@ -6456,9 +6456,9 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX11-TRUE16-NEXT:    v_lshlrev_b32_e32 v10, v10, v7
 ; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v12, 2, v12
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v18
+; GFX11-TRUE16-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v18
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v21, 0, 1, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v14
+; GFX11-TRUE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v14
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e32 v11, v19, v11, vcc_lo
 ; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v18
 ; GFX11-TRUE16-NEXT:    v_and_b32_e32 v19, 7, v11
@@ -6468,7 +6468,7 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX11-TRUE16-NEXT:    v_or_b32_e32 v18, v18, v21
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v19
+; GFX11-TRUE16-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v19
 ; GFX11-TRUE16-NEXT:    v_or_b32_e32 v7, v7, v10
 ; GFX11-TRUE16-NEXT:    v_add_nc_u32_e32 v16, 0xfffffc10, v20
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc_lo
@@ -6476,9 +6476,9 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
 ; GFX11-TRUE16-NEXT:    v_lshl_or_b32 v10, v16, 12, v0
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v16
+; GFX11-TRUE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v16
 ; GFX11-TRUE16-NEXT:    v_dual_cndmask_b32 v7, v10, v7 :: v_dual_add_nc_u32 v10, v12, v18
-; GFX11-TRUE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v17
+; GFX11-TRUE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v17
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-TRUE16-NEXT:    v_or_b32_e32 v12, v19, v20
 ; GFX11-TRUE16-NEXT:    v_and_b32_e32 v18, 7, v7
@@ -6486,13 +6486,13 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e32 v10, 0x7c00, v10, vcc_lo
 ; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v4
 ; GFX11-TRUE16-NEXT:    v_dual_cndmask_b32 v4, 0x7c00, v13 :: v_dual_add_nc_u32 v11, v11, v12
-; GFX11-TRUE16-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v18
+; GFX11-TRUE16-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v18
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc_lo
 ; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v18
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc_lo
 ; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v17
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e32 v4, v10, v4, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v14
+; GFX11-TRUE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v14
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3)
 ; GFX11-TRUE16-NEXT:    v_or_b32_e32 v10, v18, v12
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e32 v5, 0x7c00, v11, vcc_lo
@@ -6505,7 +6505,7 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v14
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v16
+; GFX11-TRUE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v16
 ; GFX11-TRUE16-NEXT:    v_and_or_b32 v2, 0x8000, v15, v2
 ; GFX11-TRUE16-NEXT:    v_cndmask_b32_e32 v3, 0x7c00, v7, vcc_lo
 ; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
@@ -6562,7 +6562,7 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v14, 0xfffffc10, v15
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v15, v13, v17
 ; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v18, v10, 12, v4
-; GFX11-FAKE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v10
+; GFX11-FAKE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v10
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-FAKE16-NEXT:    v_lshlrev_b32_e32 v13, v13, v15
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v11, v18, v11, vcc_lo
@@ -6579,20 +6579,20 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v13, v17
 ; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v16, v14, 12, v6
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v19
+; GFX11-FAKE16-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v19
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
 ; GFX11-FAKE16-NEXT:    v_or_b32_e32 v13, v15, v13
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v19
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v14
+; GFX11-FAKE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v14
 ; GFX11-FAKE16-NEXT:    v_or_b32_e32 v15, v19, v17
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v13, v16, v13, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_or_b32_e32 v16, 0x1000, v0
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 8, v3
 ; GFX11-FAKE16-NEXT:    v_bfe_u32 v19, v3, 20, 11
 ; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v11, v11, v15
-; GFX11-FAKE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v10
+; GFX11-FAKE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v10
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v20, v18, v16
 ; GFX11-FAKE16-NEXT:    v_and_or_b32 v2, 0xffe, v17, v2
 ; GFX11-FAKE16-NEXT:    v_sub_nc_u32_e32 v17, 0x3f1, v19
@@ -6604,7 +6604,7 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX11-FAKE16-NEXT:    v_med3_i32 v17, v17, 0, 13
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v13, 2, v13
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX11-FAKE16-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v15
+; GFX11-FAKE16-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v15
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v23, v17, v21
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v22, 0, 1, vcc_lo
@@ -6624,27 +6624,27 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX11-FAKE16-NEXT:    v_or_b32_e32 v17, v23, v17
 ; GFX11-FAKE16-NEXT:    v_add_nc_u32_e32 v12, 0xfffffc10, v12
 ; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v20, v12, 12, v0
-; GFX11-FAKE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v12
+; GFX11-FAKE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v12
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v16, v20, v16, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v4
 ; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v20, v19, 12, v2
 ; GFX11-FAKE16-NEXT:    v_dual_cndmask_b32 v4, 0x7c00, v18 :: v_dual_and_b32 v15, 7, v16
-; GFX11-FAKE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v19
+; GFX11-FAKE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v19
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v17, v20, v17, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v10
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v11, v4, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v14
+; GFX11-FAKE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v14
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-FAKE16-NEXT:    v_and_b32_e32 v11, 7, v17
 ; GFX11-FAKE16-NEXT:    v_and_or_b32 v4, 0x8000, v5, v4
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v10, 0x7c00, v13, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v15
+; GFX11-FAKE16-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v15
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v15
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v11
+; GFX11-FAKE16-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v11
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-FAKE16-NEXT:    v_or_b32_e32 v13, v15, v13
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc_lo
@@ -6660,11 +6660,11 @@ define <4 x half> @v_copysign_out_v4f16_mag_v4f64_sign_v4f16(<4 x double> %mag,
 ; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v14
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v10, v6, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v12
+; GFX11-FAKE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v12
 ; GFX11-FAKE16-NEXT:    v_dual_cndmask_b32 v11, 0x7c00, v13 :: v_dual_add_nc_u32 v10, v16, v11
 ; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7c00, v18, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v19
+; GFX11-FAKE16-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v19
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
 ; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v10, 0x7c00, v10, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v2
diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
index ffe0596a95e33..ffff12991a262 100644
--- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
@@ -3512,7 +3512,7 @@ define amdgpu_kernel void @atomic_max_i64_offset(ptr %out, i64 %in) {
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -3561,7 +3561,7 @@ define amdgpu_kernel void @atomic_max_i64_offset(ptr %out, i64 %in) {
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -3597,7 +3597,7 @@ define amdgpu_kernel void @atomic_max_i64_offset(ptr %out, i64 %in) {
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_lt_i64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_le_i64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -3650,7 +3650,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_offset(ptr %out, ptr %out2, i64 %i
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s5
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[12:15], 0 offen
@@ -3702,7 +3702,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_offset(ptr %out, ptr %out2, i64 %i
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s4
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s5
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -3742,7 +3742,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_offset(ptr %out, ptr %out2, i64 %i
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_lt_i64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_le_i64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v3, s5, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -3805,7 +3805,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -3858,7 +3858,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -3899,7 +3899,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_lt_i64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_le_i64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -3955,7 +3955,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[16:19], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[16:19], 0 offen
@@ -4009,7 +4009,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s12
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -4050,7 +4050,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_lt_i64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_le_i64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v3, s5, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -4108,7 +4108,7 @@ define amdgpu_kernel void @atomic_max_i64(ptr %out, i64 %in) {
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -4155,7 +4155,7 @@ define amdgpu_kernel void @atomic_max_i64(ptr %out, i64 %in) {
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -4190,7 +4190,7 @@ define amdgpu_kernel void @atomic_max_i64(ptr %out, i64 %in) {
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_lt_i64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_le_i64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -4240,7 +4240,7 @@ define amdgpu_kernel void @atomic_max_i64_ret(ptr %out, ptr %out2, i64 %in) {
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s5
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[12:15], 0 offen
@@ -4290,7 +4290,7 @@ define amdgpu_kernel void @atomic_max_i64_ret(ptr %out, ptr %out2, i64 %in) {
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s4
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s5
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -4329,7 +4329,7 @@ define amdgpu_kernel void @atomic_max_i64_ret(ptr %out, ptr %out2, i64 %in) {
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_lt_i64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_le_i64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v3, s5, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -4389,7 +4389,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -4440,7 +4440,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -4479,7 +4479,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_lt_i64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_le_i64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -4532,7 +4532,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[16:19], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[16:19], 0 offen
@@ -4584,7 +4584,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s12
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -4624,7 +4624,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_lt_i64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_le_i64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v3, s5, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -4683,7 +4683,7 @@ define amdgpu_kernel void @atomic_umax_i64_offset(ptr %out, i64 %in) {
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -4732,7 +4732,7 @@ define amdgpu_kernel void @atomic_umax_i64_offset(ptr %out, i64 %in) {
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -4768,7 +4768,7 @@ define amdgpu_kernel void @atomic_umax_i64_offset(ptr %out, i64 %in) {
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_lt_u64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_le_u64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -4821,7 +4821,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_offset(ptr %out, ptr %out2, i64 %
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s5
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[12:15], 0 offen
@@ -4873,7 +4873,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_offset(ptr %out, ptr %out2, i64 %
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s4
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s5
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -4913,7 +4913,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_offset(ptr %out, ptr %out2, i64 %
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_lt_u64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_le_u64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v3, s5, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -4976,7 +4976,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -5029,7 +5029,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -5070,7 +5070,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_lt_u64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_le_u64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -5126,7 +5126,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[16:19], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[16:19], 0 offen
@@ -5180,7 +5180,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s12
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -5221,7 +5221,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_lt_u64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_le_u64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v3, s5, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -5279,7 +5279,7 @@ define amdgpu_kernel void @atomic_umax_i64(ptr %out, i64 %in) {
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -5326,7 +5326,7 @@ define amdgpu_kernel void @atomic_umax_i64(ptr %out, i64 %in) {
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -5361,7 +5361,7 @@ define amdgpu_kernel void @atomic_umax_i64(ptr %out, i64 %in) {
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_lt_u64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_le_u64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -5411,7 +5411,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret(ptr %out, ptr %out2, i64 %in) {
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s5
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[12:15], 0 offen
@@ -5461,7 +5461,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret(ptr %out, ptr %out2, i64 %in) {
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s4
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s5
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -5500,7 +5500,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret(ptr %out, ptr %out2, i64 %in) {
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_lt_u64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_le_u64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v3, s5, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -5560,7 +5560,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -5611,7 +5611,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -5650,7 +5650,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_lt_u64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_le_u64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -5703,7 +5703,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 %
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[16:19], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[16:19], 0 offen
@@ -5755,7 +5755,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 %
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s12
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -5795,7 +5795,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 %
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_lt_u64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_le_u64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v3, s5, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -5854,7 +5854,7 @@ define amdgpu_kernel void @atomic_min_i64_offset(ptr %out, i64 %in) {
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -5903,7 +5903,7 @@ define amdgpu_kernel void @atomic_min_i64_offset(ptr %out, i64 %in) {
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -5939,7 +5939,7 @@ define amdgpu_kernel void @atomic_min_i64_offset(ptr %out, i64 %in) {
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -5992,7 +5992,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_offset(ptr %out, ptr %out2, i64 %i
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s5
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[12:15], 0 offen
@@ -6044,7 +6044,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_offset(ptr %out, ptr %out2, i64 %i
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s4
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s5
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -6084,7 +6084,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_offset(ptr %out, ptr %out2, i64 %i
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v3, s5, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -6147,7 +6147,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -6200,7 +6200,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -6241,7 +6241,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -6297,7 +6297,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[16:19], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[16:19], 0 offen
@@ -6351,7 +6351,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s12
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -6392,7 +6392,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v3, s5, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -6450,7 +6450,7 @@ define amdgpu_kernel void @atomic_min_i64(ptr %out, i64 %in) {
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -6497,7 +6497,7 @@ define amdgpu_kernel void @atomic_min_i64(ptr %out, i64 %in) {
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -6532,7 +6532,7 @@ define amdgpu_kernel void @atomic_min_i64(ptr %out, i64 %in) {
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -6582,7 +6582,7 @@ define amdgpu_kernel void @atomic_min_i64_ret(ptr %out, ptr %out2, i64 %in) {
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s5
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[12:15], 0 offen
@@ -6632,7 +6632,7 @@ define amdgpu_kernel void @atomic_min_i64_ret(ptr %out, ptr %out2, i64 %in) {
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s4
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s5
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -6671,7 +6671,7 @@ define amdgpu_kernel void @atomic_min_i64_ret(ptr %out, ptr %out2, i64 %in) {
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v3, s5, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -6731,7 +6731,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -6782,7 +6782,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -6821,7 +6821,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -6874,7 +6874,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[16:19], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[16:19], 0 offen
@@ -6926,7 +6926,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s12
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -6966,7 +6966,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v3, s5, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -7025,7 +7025,7 @@ define amdgpu_kernel void @atomic_umin_i64_offset(ptr %out, i64 %in) {
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -7074,7 +7074,7 @@ define amdgpu_kernel void @atomic_umin_i64_offset(ptr %out, i64 %in) {
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -7110,7 +7110,7 @@ define amdgpu_kernel void @atomic_umin_i64_offset(ptr %out, i64 %in) {
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -7163,7 +7163,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_offset(ptr %out, ptr %out2, i64 %
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s5
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[12:15], 0 offen
@@ -7215,7 +7215,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_offset(ptr %out, ptr %out2, i64 %
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s4
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s5
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -7255,7 +7255,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_offset(ptr %out, ptr %out2, i64 %
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v3, s5, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -7318,7 +7318,7 @@ define amdgpu_kernel void @atomic_umin_i64_addr64_offset(ptr %out, i64 %in, i64
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -7371,7 +7371,7 @@ define amdgpu_kernel void @atomic_umin_i64_addr64_offset(ptr %out, i64 %in, i64
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -7412,7 +7412,7 @@ define amdgpu_kernel void @atomic_umin_i64_addr64_offset(ptr %out, i64 %in, i64
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -7468,7 +7468,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(ptr %out, ptr %out2
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[16:19], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[12:13], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[16:19], 0 offen
@@ -7522,7 +7522,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(ptr %out, ptr %out2
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s12
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[12:13], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -7563,7 +7563,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(ptr %out, ptr %out2
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v3, s5, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -7621,7 +7621,7 @@ define amdgpu_kernel void @atomic_umin_i64(ptr %out, i64 %in) {
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -7668,7 +7668,7 @@ define amdgpu_kernel void @atomic_umin_i64(ptr %out, i64 %in) {
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -7703,7 +7703,7 @@ define amdgpu_kernel void @atomic_umin_i64(ptr %out, i64 %in) {
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -7753,7 +7753,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret(ptr %out, ptr %out2, i64 %in) {
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s5
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[12:15], 0 offen
@@ -7803,7 +7803,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret(ptr %out, ptr %out2, i64 %in) {
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s4
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s5
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -7842,7 +7842,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret(ptr %out, ptr %out2, i64 %in) {
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v3, s5, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -7902,7 +7902,7 @@ define amdgpu_kernel void @atomic_umin_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -7953,7 +7953,7 @@ define amdgpu_kernel void @atomic_umin_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -7992,7 +7992,7 @@ define amdgpu_kernel void @atomic_umin_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -8045,7 +8045,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_addr64(ptr %out, ptr %out2, i64 %
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[16:19], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[12:13], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[16:19], 0 offen
@@ -8097,7 +8097,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_addr64(ptr %out, ptr %out2, i64 %
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s12
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[12:13], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -8137,7 +8137,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_addr64(ptr %out, ptr %out2, i64 %
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v3, s5, v1, vcc_lo
 ; GFX12-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -14010,7 +14010,7 @@ define amdgpu_kernel void @atomic_inc_i64_offset(ptr %out, i64 %in) {
 ; GCN1-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, 0, v5, vcc
 ; GCN1-NEXT:    buffer_store_dword v1, v2, s[12:15], 0 offen
@@ -14061,7 +14061,7 @@ define amdgpu_kernel void @atomic_inc_i64_offset(ptr %out, i64 %in) {
 ; GCN2-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, 0, v5, vcc
 ; GCN2-NEXT:    buffer_store_dword v1, v2, s[88:91], 0 offen
@@ -14100,7 +14100,7 @@ define amdgpu_kernel void @atomic_inc_i64_offset(ptr %out, i64 %in) {
 ; GFX12-NEXT:    v_add_co_u32 v2, vcc_lo, v0, 1
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX12-NEXT:    v_add_co_ci_u32_e64 v3, null, 0, v1, vcc_lo
-; GFX12-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    s_wait_alu 0xfffd
 ; GFX12-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -14155,7 +14155,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_offset(ptr %out, ptr %out2, i64 %i
 ; GCN1-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1]
+; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GCN1-NEXT:    buffer_store_dword v4, v2, s[12:15], 0 offen
@@ -14208,7 +14208,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_offset(ptr %out, ptr %out2, i64 %i
 ; GCN2-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1]
+; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GCN2-NEXT:    buffer_store_dword v4, v2, s[88:91], 0 offen
@@ -14250,7 +14250,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_offset(ptr %out, ptr %out2, i64 %i
 ; GFX12-NEXT:    v_add_co_u32 v2, vcc_lo, v0, 1
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX12-NEXT:    v_add_co_ci_u32_e64 v3, null, 0, v1, vcc_lo
-; GFX12-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    s_wait_alu 0xfffd
 ; GFX12-NEXT:    v_dual_cndmask_b32 v3, 0, v3 :: v_dual_cndmask_b32 v2, 0, v2
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -14315,7 +14315,7 @@ define amdgpu_kernel void @atomic_inc_i64_incr64_offset(ptr %out, i64 %in, i64 %
 ; GCN1-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, 0, v5, vcc
 ; GCN1-NEXT:    buffer_store_dword v1, v2, s[12:15], 0 offen
@@ -14370,7 +14370,7 @@ define amdgpu_kernel void @atomic_inc_i64_incr64_offset(ptr %out, i64 %in, i64 %
 ; GCN2-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, 0, v5, vcc
 ; GCN2-NEXT:    buffer_store_dword v1, v2, s[88:91], 0 offen
@@ -14414,7 +14414,7 @@ define amdgpu_kernel void @atomic_inc_i64_incr64_offset(ptr %out, i64 %in, i64 %
 ; GFX12-NEXT:    v_add_co_u32 v2, vcc_lo, v0, 1
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX12-NEXT:    v_add_co_ci_u32_e64 v3, null, 0, v1, vcc_lo
-; GFX12-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    s_wait_alu 0xfffd
 ; GFX12-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -14472,7 +14472,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_incr64_offset(ptr %out, ptr %out2,
 ; GCN1-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[12:13], v[0:1]
+; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GCN1-NEXT:    buffer_store_dword v4, v2, s[16:19], 0 offen
@@ -14527,7 +14527,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_incr64_offset(ptr %out, ptr %out2,
 ; GCN2-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[12:13], v[0:1]
+; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GCN2-NEXT:    buffer_store_dword v4, v2, s[88:91], 0 offen
@@ -14570,7 +14570,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_incr64_offset(ptr %out, ptr %out2,
 ; GFX12-NEXT:    v_add_co_u32 v2, vcc_lo, v0, 1
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX12-NEXT:    v_add_co_ci_u32_e64 v3, null, 0, v1, vcc_lo
-; GFX12-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    s_wait_alu 0xfffd
 ; GFX12-NEXT:    v_dual_cndmask_b32 v3, 0, v3 :: v_dual_cndmask_b32 v2, 0, v2
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -14630,7 +14630,7 @@ define amdgpu_kernel void @atomic_inc_i64(ptr %out, i64 %in) {
 ; GCN1-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, 0, v5, vcc
 ; GCN1-NEXT:    buffer_store_dword v1, v2, s[12:15], 0 offen
@@ -14679,7 +14679,7 @@ define amdgpu_kernel void @atomic_inc_i64(ptr %out, i64 %in) {
 ; GCN2-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, 0, v5, vcc
 ; GCN2-NEXT:    buffer_store_dword v1, v2, s[88:91], 0 offen
@@ -14717,7 +14717,7 @@ define amdgpu_kernel void @atomic_inc_i64(ptr %out, i64 %in) {
 ; GFX12-NEXT:    v_add_co_u32 v2, vcc_lo, v0, 1
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX12-NEXT:    v_add_co_ci_u32_e64 v3, null, 0, v1, vcc_lo
-; GFX12-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    s_wait_alu 0xfffd
 ; GFX12-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -14769,7 +14769,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret(ptr %out, ptr %out2, i64 %in) {
 ; GCN1-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1]
+; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GCN1-NEXT:    buffer_store_dword v4, v2, s[12:15], 0 offen
@@ -14820,7 +14820,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret(ptr %out, ptr %out2, i64 %in) {
 ; GCN2-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1]
+; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GCN2-NEXT:    buffer_store_dword v4, v2, s[88:91], 0 offen
@@ -14861,7 +14861,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret(ptr %out, ptr %out2, i64 %in) {
 ; GFX12-NEXT:    v_add_co_u32 v2, vcc_lo, v0, 1
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX12-NEXT:    v_add_co_ci_u32_e64 v3, null, 0, v1, vcc_lo
-; GFX12-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    s_wait_alu 0xfffd
 ; GFX12-NEXT:    v_dual_cndmask_b32 v3, 0, v3 :: v_dual_cndmask_b32 v2, 0, v2
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -14923,7 +14923,7 @@ define amdgpu_kernel void @atomic_inc_i64_incr64(ptr %out, i64 %in, i64 %index)
 ; GCN1-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, 0, v5, vcc
 ; GCN1-NEXT:    buffer_store_dword v1, v2, s[12:15], 0 offen
@@ -14976,7 +14976,7 @@ define amdgpu_kernel void @atomic_inc_i64_incr64(ptr %out, i64 %in, i64 %index)
 ; GCN2-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, 0, v5, vcc
 ; GCN2-NEXT:    buffer_store_dword v1, v2, s[88:91], 0 offen
@@ -15018,7 +15018,7 @@ define amdgpu_kernel void @atomic_inc_i64_incr64(ptr %out, i64 %in, i64 %index)
 ; GFX12-NEXT:    v_add_co_u32 v2, vcc_lo, v0, 1
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX12-NEXT:    v_add_co_ci_u32_e64 v3, null, 0, v1, vcc_lo
-; GFX12-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[2:3], v[0:1]
 ; GFX12-NEXT:    s_wait_alu 0xfffd
 ; GFX12-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
@@ -15073,7 +15073,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_incr64(ptr %out, ptr %out2, i64 %i
 ; GCN1-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[12:13], v[0:1]
+; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GCN1-NEXT:    buffer_store_dword v4, v2, s[16:19], 0 offen
@@ -15126,7 +15126,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_incr64(ptr %out, ptr %out2, i64 %i
 ; GCN2-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[12:13], v[0:1]
+; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GCN2-NEXT:    buffer_store_dword v4, v2, s[88:91], 0 offen
@@ -15168,7 +15168,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_incr64(ptr %out, ptr %out2, i64 %i
 ; GFX12-NEXT:    v_add_co_u32 v2, vcc_lo, v0, 1
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX12-NEXT:    v_add_co_ci_u32_e64 v3, null, 0, v1, vcc_lo
-; GFX12-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[4:5], v[0:1]
 ; GFX12-NEXT:    s_wait_alu 0xfffd
 ; GFX12-NEXT:    v_dual_cndmask_b32 v3, 0, v3 :: v_dual_cndmask_b32 v2, 0, v2
 ; GFX12-NEXT:    scratch_store_b64 off, v[2:3], s0
@@ -15229,7 +15229,7 @@ define amdgpu_kernel void @atomic_dec_i64_offset(ptr %out, i64 %in) {
 ; GCN1-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN1-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e64 s[0:1], s[2:3], v[0:1]
 ; GCN1-NEXT:    v_add_i32_e64 v0, s[2:3], -1, v0
 ; GCN1-NEXT:    s_or_b64 vcc, vcc, s[0:1]
 ; GCN1-NEXT:    v_addc_u32_e64 v1, s[2:3], -1, v1, s[2:3]
@@ -15283,7 +15283,7 @@ define amdgpu_kernel void @atomic_dec_i64_offset(ptr %out, i64 %in) {
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN2-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e64 s[0:1], s[2:3], v[0:1]
 ; GCN2-NEXT:    v_add_u32_e64 v0, s[2:3], -1, v0
 ; GCN2-NEXT:    s_or_b64 vcc, vcc, s[0:1]
 ; GCN2-NEXT:    v_addc_u32_e64 v1, s[2:3], -1, v1, s[2:3]
@@ -15323,7 +15323,7 @@ define amdgpu_kernel void @atomic_dec_i64_offset(ptr %out, i64 %in) {
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s4
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[0:1]
-; GFX12-NEXT:    v_cmp_lt_u64_e64 s0, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_le_u64_e64 s0, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_add_co_u32 v0, s1, v0, -1
 ; GFX12-NEXT:    s_wait_alu 0xf1ff
 ; GFX12-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, s1
@@ -15385,7 +15385,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_offset(ptr %out, ptr %out2, i64 %i
 ; GCN1-NEXT:    v_add_i32_e64 v6, s[2:3], -1, v0
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN1-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[0:1], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e64 s[0:1], s[0:1], v[0:1]
 ; GCN1-NEXT:    v_addc_u32_e64 v7, s[2:3], -1, v1, s[2:3]
 ; GCN1-NEXT:    s_or_b64 vcc, vcc, s[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
@@ -15442,7 +15442,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_offset(ptr %out, ptr %out2, i64 %i
 ; GCN2-NEXT:    v_add_u32_e64 v6, s[2:3], -1, v0
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN2-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[0:1], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e64 s[0:1], s[0:1], v[0:1]
 ; GCN2-NEXT:    v_addc_u32_e64 v7, s[2:3], -1, v1, s[2:3]
 ; GCN2-NEXT:    s_or_b64 vcc, vcc, s[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
@@ -15484,7 +15484,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_offset(ptr %out, ptr %out2, i64 %i
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s6
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[0:1]
-; GFX12-NEXT:    v_cmp_lt_u64_e64 s0, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_le_u64_e64 s0, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_add_co_u32 v2, s1, v0, -1
 ; GFX12-NEXT:    s_wait_alu 0xf1ff
 ; GFX12-NEXT:    v_add_co_ci_u32_e64 v3, null, -1, v1, s1
@@ -15554,7 +15554,7 @@ define amdgpu_kernel void @atomic_dec_i64_decr64_offset(ptr %out, i64 %in, i64 %
 ; GCN1-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN1-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e64 s[0:1], s[2:3], v[0:1]
 ; GCN1-NEXT:    v_add_i32_e64 v0, s[2:3], -1, v0
 ; GCN1-NEXT:    s_or_b64 vcc, vcc, s[0:1]
 ; GCN1-NEXT:    v_addc_u32_e64 v1, s[2:3], -1, v1, s[2:3]
@@ -15612,7 +15612,7 @@ define amdgpu_kernel void @atomic_dec_i64_decr64_offset(ptr %out, i64 %in, i64 %
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN2-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e64 s[0:1], s[2:3], v[0:1]
 ; GCN2-NEXT:    v_add_u32_e64 v0, s[2:3], -1, v0
 ; GCN2-NEXT:    s_or_b64 vcc, vcc, s[0:1]
 ; GCN2-NEXT:    v_addc_u32_e64 v1, s[2:3], -1, v1, s[2:3]
@@ -15657,7 +15657,7 @@ define amdgpu_kernel void @atomic_dec_i64_decr64_offset(ptr %out, i64 %in, i64 %
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s4
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[0:1]
-; GFX12-NEXT:    v_cmp_lt_u64_e64 s0, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_le_u64_e64 s0, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_add_co_u32 v0, s1, v0, -1
 ; GFX12-NEXT:    s_wait_alu 0xf1ff
 ; GFX12-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, s1
@@ -15722,7 +15722,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_decr64_offset(ptr %out, ptr %out2,
 ; GCN1-NEXT:    v_add_i32_e64 v6, s[2:3], -1, v0
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN1-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[12:13], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e64 s[0:1], s[12:13], v[0:1]
 ; GCN1-NEXT:    v_addc_u32_e64 v7, s[2:3], -1, v1, s[2:3]
 ; GCN1-NEXT:    s_or_b64 vcc, vcc, s[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
@@ -15781,7 +15781,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_decr64_offset(ptr %out, ptr %out2,
 ; GCN2-NEXT:    v_add_u32_e64 v6, s[2:3], -1, v0
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN2-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[12:13], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e64 s[0:1], s[12:13], v[0:1]
 ; GCN2-NEXT:    v_addc_u32_e64 v7, s[2:3], -1, v1, s[2:3]
 ; GCN2-NEXT:    s_or_b64 vcc, vcc, s[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
@@ -15824,7 +15824,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_decr64_offset(ptr %out, ptr %out2,
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s6
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[0:1]
-; GFX12-NEXT:    v_cmp_lt_u64_e64 s0, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_le_u64_e64 s0, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_add_co_u32 v2, s1, v0, -1
 ; GFX12-NEXT:    s_wait_alu 0xf1ff
 ; GFX12-NEXT:    v_add_co_ci_u32_e64 v3, null, -1, v1, s1
@@ -15889,7 +15889,7 @@ define amdgpu_kernel void @atomic_dec_i64(ptr %out, i64 %in) {
 ; GCN1-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN1-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e64 s[0:1], s[2:3], v[0:1]
 ; GCN1-NEXT:    v_add_i32_e64 v0, s[2:3], -1, v0
 ; GCN1-NEXT:    s_or_b64 vcc, vcc, s[0:1]
 ; GCN1-NEXT:    v_addc_u32_e64 v1, s[2:3], -1, v1, s[2:3]
@@ -15941,7 +15941,7 @@ define amdgpu_kernel void @atomic_dec_i64(ptr %out, i64 %in) {
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN2-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e64 s[0:1], s[2:3], v[0:1]
 ; GCN2-NEXT:    v_add_u32_e64 v0, s[2:3], -1, v0
 ; GCN2-NEXT:    s_or_b64 vcc, vcc, s[0:1]
 ; GCN2-NEXT:    v_addc_u32_e64 v1, s[2:3], -1, v1, s[2:3]
@@ -15980,7 +15980,7 @@ define amdgpu_kernel void @atomic_dec_i64(ptr %out, i64 %in) {
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s4
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[0:1]
-; GFX12-NEXT:    v_cmp_lt_u64_e64 s0, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_le_u64_e64 s0, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_add_co_u32 v0, s1, v0, -1
 ; GFX12-NEXT:    s_wait_alu 0xf1ff
 ; GFX12-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, s1
@@ -16039,7 +16039,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret(ptr %out, ptr %out2, i64 %in) {
 ; GCN1-NEXT:    v_add_i32_e64 v6, s[2:3], -1, v0
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN1-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[0:1], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e64 s[0:1], s[0:1], v[0:1]
 ; GCN1-NEXT:    v_addc_u32_e64 v7, s[2:3], -1, v1, s[2:3]
 ; GCN1-NEXT:    s_or_b64 vcc, vcc, s[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
@@ -16094,7 +16094,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret(ptr %out, ptr %out2, i64 %in) {
 ; GCN2-NEXT:    v_add_u32_e64 v6, s[2:3], -1, v0
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN2-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[0:1], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e64 s[0:1], s[0:1], v[0:1]
 ; GCN2-NEXT:    v_addc_u32_e64 v7, s[2:3], -1, v1, s[2:3]
 ; GCN2-NEXT:    s_or_b64 vcc, vcc, s[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
@@ -16135,7 +16135,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret(ptr %out, ptr %out2, i64 %in) {
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s6
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[0:1]
-; GFX12-NEXT:    v_cmp_lt_u64_e64 s0, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_le_u64_e64 s0, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_add_co_u32 v2, s1, v0, -1
 ; GFX12-NEXT:    s_wait_alu 0xf1ff
 ; GFX12-NEXT:    v_add_co_ci_u32_e64 v3, null, -1, v1, s1
@@ -16202,7 +16202,7 @@ define amdgpu_kernel void @atomic_dec_i64_decr64(ptr %out, i64 %in, i64 %index)
 ; GCN1-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN1-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e64 s[0:1], s[2:3], v[0:1]
 ; GCN1-NEXT:    v_add_i32_e64 v0, s[2:3], -1, v0
 ; GCN1-NEXT:    s_or_b64 vcc, vcc, s[0:1]
 ; GCN1-NEXT:    v_addc_u32_e64 v1, s[2:3], -1, v1, s[2:3]
@@ -16258,7 +16258,7 @@ define amdgpu_kernel void @atomic_dec_i64_decr64(ptr %out, i64 %in, i64 %index)
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN2-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e64 s[0:1], s[2:3], v[0:1]
 ; GCN2-NEXT:    v_add_u32_e64 v0, s[2:3], -1, v0
 ; GCN2-NEXT:    s_or_b64 vcc, vcc, s[0:1]
 ; GCN2-NEXT:    v_addc_u32_e64 v1, s[2:3], -1, v1, s[2:3]
@@ -16301,7 +16301,7 @@ define amdgpu_kernel void @atomic_dec_i64_decr64(ptr %out, i64 %in, i64 %index)
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s4
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[0:1]
-; GFX12-NEXT:    v_cmp_lt_u64_e64 s0, s[2:3], v[0:1]
+; GFX12-NEXT:    v_cmp_le_u64_e64 s0, s[2:3], v[0:1]
 ; GFX12-NEXT:    v_add_co_u32 v0, s1, v0, -1
 ; GFX12-NEXT:    s_wait_alu 0xf1ff
 ; GFX12-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, v1, s1
@@ -16363,7 +16363,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_decr64(ptr %out, ptr %out2, i64 %i
 ; GCN1-NEXT:    v_add_i32_e64 v6, s[2:3], -1, v0
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN1-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[12:13], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e64 s[0:1], s[12:13], v[0:1]
 ; GCN1-NEXT:    v_addc_u32_e64 v7, s[2:3], -1, v1, s[2:3]
 ; GCN1-NEXT:    s_or_b64 vcc, vcc, s[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
@@ -16420,7 +16420,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_decr64(ptr %out, ptr %out2, i64 %i
 ; GCN2-NEXT:    v_add_u32_e64 v6, s[2:3], -1, v0
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN2-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[12:13], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e64 s[0:1], s[12:13], v[0:1]
 ; GCN2-NEXT:    v_addc_u32_e64 v7, s[2:3], -1, v1, s[2:3]
 ; GCN2-NEXT:    s_or_b64 vcc, vcc, s[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
@@ -16462,7 +16462,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_decr64(ptr %out, ptr %out2, i64 %i
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s6
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 0, v[0:1]
-; GFX12-NEXT:    v_cmp_lt_u64_e64 s0, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_le_u64_e64 s0, s[4:5], v[0:1]
 ; GFX12-NEXT:    v_add_co_u32 v2, s1, v0, -1
 ; GFX12-NEXT:    s_wait_alu 0xf1ff
 ; GFX12-NEXT:    v_add_co_ci_u32_e64 v3, null, -1, v1, s1
diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system.ll
index 23dfe2f70fa7e..7a7bdd5c32985 100644
--- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system.ll
@@ -11431,7 +11431,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_scalar(ptr inreg %ptr, i64 inr
 ; GCN1-NEXT:  .LBB84_4: ; %atomicrmw.start
 ; GCN1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[2:3]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -11458,7 +11458,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_scalar(ptr inreg %ptr, i64 inr
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[0:3], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -11497,7 +11497,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_scalar(ptr inreg %ptr, i64 inr
 ; GCN2-NEXT:  .LBB84_4: ; %atomicrmw.start
 ; GCN2-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[2:3]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -11523,7 +11523,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_scalar(ptr inreg %ptr, i64 inr
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s6
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -11555,7 +11555,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_scalar(ptr inreg %ptr, i64 inr
 ; GCN3-NEXT:  .LBB84_4: ; %atomicrmw.start
 ; GCN3-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
+; GCN3-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[2:3]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -11579,7 +11579,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_scalar(ptr inreg %ptr, i64 inr
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s6
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -11624,7 +11624,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN1-NEXT:  .LBB85_4: ; %atomicrmw.start
 ; GCN1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[2:3]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -11651,7 +11651,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[0:3], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -11692,7 +11692,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN2-NEXT:  .LBB85_4: ; %atomicrmw.start
 ; GCN2-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[2:3]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -11718,7 +11718,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s6
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -11752,7 +11752,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN3-NEXT:  .LBB85_4: ; %atomicrmw.start
 ; GCN3-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
+; GCN3-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[2:3]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -11776,7 +11776,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s6
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -11816,7 +11816,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN1-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN1-NEXT:    v_mov_b32_e32 v8, v0
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[8:9]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -11844,7 +11844,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[0:3], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[0:3], 0 offen
@@ -11880,7 +11880,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN2-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN2-NEXT:    v_mov_b32_e32 v8, v0
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[8:9]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -11907,7 +11907,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s6
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[0:3], 0 offen
@@ -11936,7 +11936,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN3-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN3-NEXT:    v_mov_b32_e32 v8, v0
-; GCN3-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
+; GCN3-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[8:9]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -11961,7 +11961,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s6
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v4, v2, s[0:3], 0 offen
@@ -12003,7 +12003,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_offset_scalar(ptr inreg %out, i64
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN1-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN1-NEXT:    v_mov_b32_e32 v8, v0
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[8:9]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -12031,7 +12031,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_offset_scalar(ptr inreg %out, i64
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[0:3], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[0:3], 0 offen
@@ -12069,7 +12069,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_offset_scalar(ptr inreg %out, i64
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN2-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN2-NEXT:    v_mov_b32_e32 v8, v0
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[8:9]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -12096,7 +12096,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_offset_scalar(ptr inreg %out, i64
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s6
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[0:3], 0 offen
@@ -12127,7 +12127,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_offset_scalar(ptr inreg %out, i64
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN3-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN3-NEXT:    v_mov_b32_e32 v8, v0
-; GCN3-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
+; GCN3-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[8:9]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -12152,7 +12152,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_offset_scalar(ptr inreg %out, i64
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s6
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v4, v2, s[0:3], 0 offen
@@ -12203,7 +12203,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GCN1-NEXT:  .LBB88_4: ; %atomicrmw.start
 ; GCN1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[2:3]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -12230,7 +12230,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -12274,7 +12274,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GCN2-NEXT:  .LBB88_4: ; %atomicrmw.start
 ; GCN2-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[2:3]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -12300,7 +12300,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -12344,7 +12344,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GCN3-NEXT:  .LBB88_4: ; %atomicrmw.start
 ; GCN3-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
+; GCN3-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[2:3]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -12368,7 +12368,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s2
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s3
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
+; GCN3-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -12414,7 +12414,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN1-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN1-NEXT:    v_mov_b32_e32 v8, v0
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[8:9]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[8:9]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -12442,7 +12442,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[16:19], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[16:19], 0 offen
@@ -12485,7 +12485,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN2-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN2-NEXT:    v_mov_b32_e32 v8, v0
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[8:9]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[8:9]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -12512,7 +12512,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s12
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -12555,7 +12555,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN3-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN3-NEXT:    v_mov_b32_e32 v8, v0
-; GCN3-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[8:9]
+; GCN3-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[8:9]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -12580,7 +12580,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s12
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s13
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
+; GCN3-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v4, v2, s[16:19], 0 offen
@@ -12634,7 +12634,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GCN1-NEXT:  .LBB90_4: ; %atomicrmw.start
 ; GCN1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[2:3]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -12661,7 +12661,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -12703,7 +12703,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GCN2-NEXT:  .LBB90_4: ; %atomicrmw.start
 ; GCN2-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[2:3]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -12729,7 +12729,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -12771,7 +12771,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GCN3-NEXT:  .LBB90_4: ; %atomicrmw.start
 ; GCN3-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
+; GCN3-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[2:3]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -12795,7 +12795,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s2
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s3
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
+; GCN3-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -12838,7 +12838,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN1-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN1-NEXT:    v_mov_b32_e32 v8, v0
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[8:9]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[8:9]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -12866,7 +12866,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[16:19], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
+; GCN1-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[16:19], 0 offen
@@ -12907,7 +12907,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN2-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN2-NEXT:    v_mov_b32_e32 v8, v0
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[8:9]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[8:9]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -12934,7 +12934,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s12
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
+; GCN2-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -12975,7 +12975,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN3-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN3-NEXT:    v_mov_b32_e32 v8, v0
-; GCN3-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[8:9]
+; GCN3-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[8:9]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -13000,7 +13000,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s12
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s13
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
+; GCN3-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v4, v2, s[16:19], 0 offen
@@ -14138,7 +14138,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_scalar(ptr inreg %ptr, i64 in
 ; GCN1-NEXT:  .LBB98_4: ; %atomicrmw.start
 ; GCN1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[2:3]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -14165,7 +14165,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_scalar(ptr inreg %ptr, i64 in
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[0:3], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -14204,7 +14204,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_scalar(ptr inreg %ptr, i64 in
 ; GCN2-NEXT:  .LBB98_4: ; %atomicrmw.start
 ; GCN2-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[2:3]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -14230,7 +14230,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_scalar(ptr inreg %ptr, i64 in
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s6
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -14262,7 +14262,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_scalar(ptr inreg %ptr, i64 in
 ; GCN3-NEXT:  .LBB98_4: ; %atomicrmw.start
 ; GCN3-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
+; GCN3-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[2:3]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -14286,7 +14286,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_scalar(ptr inreg %ptr, i64 in
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s6
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -14331,7 +14331,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN1-NEXT:  .LBB99_4: ; %atomicrmw.start
 ; GCN1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[2:3]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -14358,7 +14358,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[0:3], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -14399,7 +14399,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN2-NEXT:  .LBB99_4: ; %atomicrmw.start
 ; GCN2-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[2:3]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -14425,7 +14425,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s6
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -14459,7 +14459,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN3-NEXT:  .LBB99_4: ; %atomicrmw.start
 ; GCN3-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
+; GCN3-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[2:3]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -14483,7 +14483,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s6
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -14523,7 +14523,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN1-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN1-NEXT:    v_mov_b32_e32 v8, v0
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[8:9]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -14551,7 +14551,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[0:3], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[0:3], 0 offen
@@ -14587,7 +14587,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN2-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN2-NEXT:    v_mov_b32_e32 v8, v0
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[8:9]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -14614,7 +14614,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s6
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[0:3], 0 offen
@@ -14643,7 +14643,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN3-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN3-NEXT:    v_mov_b32_e32 v8, v0
-; GCN3-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
+; GCN3-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[8:9]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -14668,7 +14668,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s6
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v4, v2, s[0:3], 0 offen
@@ -14710,7 +14710,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_offset_scalar(ptr inreg %out, i6
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN1-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN1-NEXT:    v_mov_b32_e32 v8, v0
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[8:9]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -14738,7 +14738,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_offset_scalar(ptr inreg %out, i6
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[0:3], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[0:3], 0 offen
@@ -14776,7 +14776,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_offset_scalar(ptr inreg %out, i6
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN2-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN2-NEXT:    v_mov_b32_e32 v8, v0
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[8:9]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -14803,7 +14803,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_offset_scalar(ptr inreg %out, i6
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s6
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[0:3], 0 offen
@@ -14834,7 +14834,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_offset_scalar(ptr inreg %out, i6
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN3-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN3-NEXT:    v_mov_b32_e32 v8, v0
-; GCN3-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
+; GCN3-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[8:9]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -14859,7 +14859,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_offset_scalar(ptr inreg %out, i6
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s6
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v4, v2, s[0:3], 0 offen
@@ -14910,7 +14910,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64
 ; GCN1-NEXT:  .LBB102_4: ; %atomicrmw.start
 ; GCN1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[2:3]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[2:3]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -14937,7 +14937,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -14981,7 +14981,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64
 ; GCN2-NEXT:  .LBB102_4: ; %atomicrmw.start
 ; GCN2-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[2:3]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[2:3]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -15007,7 +15007,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -15051,7 +15051,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64
 ; GCN3-NEXT:  .LBB102_4: ; %atomicrmw.start
 ; GCN3-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[2:3]
+; GCN3-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[2:3]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -15075,7 +15075,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s2
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s3
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
+; GCN3-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -15121,7 +15121,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN1-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN1-NEXT:    v_mov_b32_e32 v8, v0
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[8:9]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[8:9]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -15149,7 +15149,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[16:19], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[16:19], 0 offen
@@ -15192,7 +15192,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN2-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN2-NEXT:    v_mov_b32_e32 v8, v0
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[8:9]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[8:9]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -15219,7 +15219,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s12
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -15262,7 +15262,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN3-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN3-NEXT:    v_mov_b32_e32 v8, v0
-; GCN3-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[8:9]
+; GCN3-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[8:9]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -15287,7 +15287,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s12
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s13
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[0:1]
+; GCN3-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v4, v2, s[16:19], 0 offen
@@ -15336,7 +15336,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 %
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN1-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN1-NEXT:    v_mov_b32_e32 v8, v0
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[8:9]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[8:9]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -15364,7 +15364,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 %
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[16:19], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[16:19], 0 offen
@@ -15405,7 +15405,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 %
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN2-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN2-NEXT:    v_mov_b32_e32 v8, v0
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[8:9]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[8:9]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -15432,7 +15432,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 %
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s12
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -15473,7 +15473,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 %
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN3-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN3-NEXT:    v_mov_b32_e32 v8, v0
-; GCN3-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[8:9]
+; GCN3-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[8:9]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -15498,7 +15498,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 %
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s12
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s13
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[0:1]
+; GCN3-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v4, v2, s[16:19], 0 offen
@@ -16636,7 +16636,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_scalar(ptr inreg %ptr, i64 in
 ; GCN1-NEXT:  .LBB111_4: ; %atomicrmw.start
 ; GCN1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
+; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[2:3]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -16663,7 +16663,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_scalar(ptr inreg %ptr, i64 in
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[0:3], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -16702,7 +16702,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_scalar(ptr inreg %ptr, i64 in
 ; GCN2-NEXT:  .LBB111_4: ; %atomicrmw.start
 ; GCN2-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
+; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[2:3]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -16728,7 +16728,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_scalar(ptr inreg %ptr, i64 in
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s6
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -16760,7 +16760,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_scalar(ptr inreg %ptr, i64 in
 ; GCN3-NEXT:  .LBB111_4: ; %atomicrmw.start
 ; GCN3-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
+; GCN3-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[2:3]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -16784,7 +16784,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_scalar(ptr inreg %ptr, i64 in
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s6
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -16829,7 +16829,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN1-NEXT:  .LBB112_4: ; %atomicrmw.start
 ; GCN1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
+; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[2:3]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -16856,7 +16856,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[0:3], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -16897,7 +16897,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN2-NEXT:  .LBB112_4: ; %atomicrmw.start
 ; GCN2-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
+; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[2:3]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -16923,7 +16923,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s6
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -16957,7 +16957,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN3-NEXT:  .LBB112_4: ; %atomicrmw.start
 ; GCN3-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
+; GCN3-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[2:3]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -16981,7 +16981,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s6
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -17021,7 +17021,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN1-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN1-NEXT:    v_mov_b32_e32 v8, v0
-; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
+; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[8:9]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -17049,7 +17049,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[0:3], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[0:3], 0 offen
@@ -17085,7 +17085,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN2-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN2-NEXT:    v_mov_b32_e32 v8, v0
-; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
+; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[8:9]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -17112,7 +17112,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s6
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[0:3], 0 offen
@@ -17141,7 +17141,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN3-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN3-NEXT:    v_mov_b32_e32 v8, v0
-; GCN3-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
+; GCN3-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[8:9]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -17166,7 +17166,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s6
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v4, v2, s[0:3], 0 offen
@@ -17208,7 +17208,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_offset_scalar(ptr inreg %out, i6
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN1-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN1-NEXT:    v_mov_b32_e32 v8, v0
-; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
+; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[8:9]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -17236,7 +17236,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_offset_scalar(ptr inreg %out, i6
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[0:3], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[0:3], 0 offen
@@ -17274,7 +17274,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_offset_scalar(ptr inreg %out, i6
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN2-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN2-NEXT:    v_mov_b32_e32 v8, v0
-; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
+; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[8:9]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -17301,7 +17301,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_offset_scalar(ptr inreg %out, i6
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s6
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[0:3], 0 offen
@@ -17332,7 +17332,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_offset_scalar(ptr inreg %out, i6
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN3-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN3-NEXT:    v_mov_b32_e32 v8, v0
-; GCN3-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
+; GCN3-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[8:9]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -17357,7 +17357,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_offset_scalar(ptr inreg %out, i6
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s6
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v4, v2, s[0:3], 0 offen
@@ -18491,7 +18491,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_scalar(ptr inreg %ptr, i64 inr
 ; GCN1-NEXT:  .LBB121_4: ; %atomicrmw.start
 ; GCN1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[2:3]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -18518,7 +18518,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_scalar(ptr inreg %ptr, i64 inr
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[0:3], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -18557,7 +18557,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_scalar(ptr inreg %ptr, i64 inr
 ; GCN2-NEXT:  .LBB121_4: ; %atomicrmw.start
 ; GCN2-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[2:3]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -18583,7 +18583,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_scalar(ptr inreg %ptr, i64 inr
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s6
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -18615,7 +18615,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_scalar(ptr inreg %ptr, i64 inr
 ; GCN3-NEXT:  .LBB121_4: ; %atomicrmw.start
 ; GCN3-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
+; GCN3-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[2:3]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -18639,7 +18639,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_scalar(ptr inreg %ptr, i64 inr
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s6
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -18684,7 +18684,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN1-NEXT:  .LBB122_4: ; %atomicrmw.start
 ; GCN1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[2:3]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -18711,7 +18711,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[0:3], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -18752,7 +18752,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN2-NEXT:  .LBB122_4: ; %atomicrmw.start
 ; GCN2-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[2:3]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -18778,7 +18778,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s6
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -18812,7 +18812,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN3-NEXT:  .LBB122_4: ; %atomicrmw.start
 ; GCN3-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
+; GCN3-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[2:3]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -18836,7 +18836,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_offset_scalar(ptr inreg %out,
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s6
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
@@ -18876,7 +18876,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN1-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN1-NEXT:    v_mov_b32_e32 v8, v0
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[8:9]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -18904,7 +18904,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[0:3], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[0:3], 0 offen
@@ -18940,7 +18940,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN2-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN2-NEXT:    v_mov_b32_e32 v8, v0
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[8:9]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -18967,7 +18967,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s6
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[0:3], 0 offen
@@ -18996,7 +18996,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN3-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN3-NEXT:    v_mov_b32_e32 v8, v0
-; GCN3-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
+; GCN3-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[8:9]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -19021,7 +19021,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s6
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v4, v2, s[0:3], 0 offen
@@ -19063,7 +19063,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_offset_scalar(ptr inreg %out, i64
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN1-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN1-NEXT:    v_mov_b32_e32 v8, v0
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[8:9]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -19091,7 +19091,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_offset_scalar(ptr inreg %out, i64
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[0:3], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[0:3], 0 offen
@@ -19129,7 +19129,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_offset_scalar(ptr inreg %out, i64
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN2-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN2-NEXT:    v_mov_b32_e32 v8, v0
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[8:9]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -19156,7 +19156,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_offset_scalar(ptr inreg %out, i64
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s6
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[0:3], 0 offen
@@ -19187,7 +19187,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_offset_scalar(ptr inreg %out, i64
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN3-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN3-NEXT:    v_mov_b32_e32 v8, v0
-; GCN3-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
+; GCN3-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[8:9]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -19212,7 +19212,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_offset_scalar(ptr inreg %out, i64
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s6
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v4, v2, s[0:3], 0 offen
@@ -19263,7 +19263,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GCN1-NEXT:  .LBB125_4: ; %atomicrmw.start
 ; GCN1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[2:3]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -19290,7 +19290,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -19334,7 +19334,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GCN2-NEXT:  .LBB125_4: ; %atomicrmw.start
 ; GCN2-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[2:3]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -19360,7 +19360,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -19404,7 +19404,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GCN3-NEXT:  .LBB125_4: ; %atomicrmw.start
 ; GCN3-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
+; GCN3-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[2:3]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -19428,7 +19428,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s2
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s3
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; GCN3-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -19474,7 +19474,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN1-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN1-NEXT:    v_mov_b32_e32 v8, v0
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[8:9]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[8:9]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -19502,7 +19502,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[16:19], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[16:19], 0 offen
@@ -19545,7 +19545,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN2-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN2-NEXT:    v_mov_b32_e32 v8, v0
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[8:9]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[8:9]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -19572,7 +19572,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s12
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -19615,7 +19615,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN3-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN3-NEXT:    v_mov_b32_e32 v8, v0
-; GCN3-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[8:9]
+; GCN3-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[8:9]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -19640,7 +19640,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s12
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s13
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[0:1]
+; GCN3-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v4, v2, s[16:19], 0 offen
@@ -19690,7 +19690,7 @@ define amdgpu_kernel void @atomic_min_i64(ptr %out, i64 %in) {
 ; GCN1-NEXT:  .LBB127_4: ; %atomicrmw.start
 ; GCN1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[2:3]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -19717,7 +19717,7 @@ define amdgpu_kernel void @atomic_min_i64(ptr %out, i64 %in) {
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -19755,7 +19755,7 @@ define amdgpu_kernel void @atomic_min_i64(ptr %out, i64 %in) {
 ; GCN2-NEXT:  .LBB127_4: ; %atomicrmw.start
 ; GCN2-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[2:3]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -19781,7 +19781,7 @@ define amdgpu_kernel void @atomic_min_i64(ptr %out, i64 %in) {
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
@@ -19819,7 +19819,7 @@ define amdgpu_kernel void @atomic_min_i64(ptr %out, i64 %in) {
 ; GCN3-NEXT:  .LBB127_4: ; %atomicrmw.start
 ; GCN3-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN3-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
+; GCN3-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[2:3]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -19843,7 +19843,7 @@ define amdgpu_kernel void @atomic_min_i64(ptr %out, i64 %in) {
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s2
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s3
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; GCN3-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
@@ -19885,7 +19885,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN1-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN1-NEXT:    v_mov_b32_e32 v8, v0
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[8:9]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[8:9]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN1-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -19913,7 +19913,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[16:19], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[0:1]
+; GCN1-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v5, v2, s[16:19], 0 offen
@@ -19954,7 +19954,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN2-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN2-NEXT:    v_mov_b32_e32 v8, v0
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[8:9]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[8:9]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN2-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -19981,7 +19981,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s12
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s13
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[0:1]
+; GCN2-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v5, v2, s[88:91], 0 offen
@@ -20022,7 +20022,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN3-NEXT:    v_mov_b32_e32 v9, v1
 ; GCN3-NEXT:    v_mov_b32_e32 v8, v0
-; GCN3-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[8:9]
+; GCN3-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[8:9]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GCN3-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -20047,7 +20047,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GCN3-NEXT:    v_mov_b32_e32 v4, s12
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s13
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
-; GCN3-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[0:1]
+; GCN3-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
 ; GCN3-NEXT:    buffer_store_dword v4, v2, s[16:19], 0 offen
@@ -21020,7 +21020,7 @@ define amdgpu_gfx void @flat_atomic_uinc_wrap_i64_noret_scalar(ptr inreg %ptr, i
 ; GCN1-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, 0, v5, vcc
 ; GCN1-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
@@ -21065,7 +21065,7 @@ define amdgpu_gfx void @flat_atomic_uinc_wrap_i64_noret_scalar(ptr inreg %ptr, i
 ; GCN2-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, 0, v5, vcc
 ; GCN2-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
@@ -21106,7 +21106,7 @@ define amdgpu_gfx void @flat_atomic_uinc_wrap_i64_noret_scalar(ptr inreg %ptr, i
 ; GCN3-NEXT:    v_add_co_u32_e32 v3, vcc, 1, v0
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
 ; GCN3-NEXT:    v_addc_co_u32_e32 v4, vcc, 0, v1, vcc
-; GCN3-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
 ; GCN3-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
@@ -21158,7 +21158,7 @@ define amdgpu_gfx void @flat_atomic_uinc_wrap_i64_noret_offset_scalar(ptr inreg
 ; GCN1-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v0, 0, v5, vcc
 ; GCN1-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
@@ -21205,7 +21205,7 @@ define amdgpu_gfx void @flat_atomic_uinc_wrap_i64_noret_offset_scalar(ptr inreg
 ; GCN2-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v0, 0, v5, vcc
 ; GCN2-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
@@ -21248,7 +21248,7 @@ define amdgpu_gfx void @flat_atomic_uinc_wrap_i64_noret_offset_scalar(ptr inreg
 ; GCN3-NEXT:    v_add_co_u32_e32 v3, vcc, 1, v0
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
 ; GCN3-NEXT:    v_addc_co_u32_e32 v4, vcc, 0, v1, vcc
-; GCN3-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
 ; GCN3-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
@@ -21296,7 +21296,7 @@ define amdgpu_gfx i64 @flat_atomic_uinc_wrap_i64_ret_scalar(ptr inreg %ptr, i64
 ; GCN1-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GCN1-NEXT:    buffer_store_dword v4, v2, s[0:3], 0 offen
@@ -21339,7 +21339,7 @@ define amdgpu_gfx i64 @flat_atomic_uinc_wrap_i64_ret_scalar(ptr inreg %ptr, i64
 ; GCN2-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GCN2-NEXT:    buffer_store_dword v4, v2, s[0:3], 0 offen
@@ -21378,7 +21378,7 @@ define amdgpu_gfx i64 @flat_atomic_uinc_wrap_i64_ret_scalar(ptr inreg %ptr, i64
 ; GCN3-NEXT:    v_add_co_u32_e32 v3, vcc, 1, v0
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
 ; GCN3-NEXT:    v_addc_co_u32_e32 v4, vcc, 0, v1, vcc
-; GCN3-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GCN3-NEXT:    buffer_store_dword v3, v2, s[0:3], 0 offen
@@ -21428,7 +21428,7 @@ define amdgpu_gfx i64 @flat_atomic_uinc_wrap_i64_ret_offset_scalar(ptr inreg %ou
 ; GCN1-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN1-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GCN1-NEXT:    buffer_store_dword v4, v2, s[0:3], 0 offen
@@ -21473,7 +21473,7 @@ define amdgpu_gfx i64 @flat_atomic_uinc_wrap_i64_ret_offset_scalar(ptr inreg %ou
 ; GCN2-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; GCN2-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; GCN2-NEXT:    buffer_store_dword v4, v2, s[0:3], 0 offen
@@ -21514,7 +21514,7 @@ define amdgpu_gfx i64 @flat_atomic_uinc_wrap_i64_ret_offset_scalar(ptr inreg %ou
 ; GCN3-NEXT:    v_add_co_u32_e32 v3, vcc, 1, v0
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
 ; GCN3-NEXT:    v_addc_co_u32_e32 v4, vcc, 0, v1, vcc
-; GCN3-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[0:1]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; GCN3-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GCN3-NEXT:    buffer_store_dword v3, v2, s[0:3], 0 offen
@@ -22411,7 +22411,7 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i64_noret_scalar(ptr inreg %ptr, i
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN1-NEXT:    v_cmp_lt_u64_e64 s[34:35], s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e64 s[34:35], s[6:7], v[0:1]
 ; GCN1-NEXT:    v_add_i32_e64 v0, s[36:37], -1, v0
 ; GCN1-NEXT:    s_or_b64 vcc, vcc, s[34:35]
 ; GCN1-NEXT:    v_addc_u32_e64 v1, s[36:37], -1, v1, s[36:37]
@@ -22459,7 +22459,7 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i64_noret_scalar(ptr inreg %ptr, i
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN2-NEXT:    v_cmp_lt_u64_e64 s[34:35], s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e64 s[34:35], s[6:7], v[0:1]
 ; GCN2-NEXT:    v_add_u32_e64 v0, s[36:37], -1, v0
 ; GCN2-NEXT:    s_or_b64 vcc, vcc, s[34:35]
 ; GCN2-NEXT:    v_addc_u32_e64 v1, s[36:37], -1, v1, s[36:37]
@@ -22503,7 +22503,7 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i64_noret_scalar(ptr inreg %ptr, i
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
 ; GCN3-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN3-NEXT:    v_cmp_lt_u64_e64 s[34:35], s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_le_u64_e64 s[34:35], s[6:7], v[0:1]
 ; GCN3-NEXT:    v_add_co_u32_e64 v0, s[36:37], -1, v0
 ; GCN3-NEXT:    s_or_b64 vcc, vcc, s[34:35]
 ; GCN3-NEXT:    v_addc_co_u32_e64 v1, s[36:37], -1, v1, s[36:37]
@@ -22558,7 +22558,7 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i64_noret_offset_scalar(ptr inreg
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN1-NEXT:    v_cmp_lt_u64_e64 s[34:35], s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e64 s[34:35], s[6:7], v[0:1]
 ; GCN1-NEXT:    v_add_i32_e64 v0, s[36:37], -1, v0
 ; GCN1-NEXT:    s_or_b64 vcc, vcc, s[34:35]
 ; GCN1-NEXT:    v_addc_u32_e64 v1, s[36:37], -1, v1, s[36:37]
@@ -22608,7 +22608,7 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i64_noret_offset_scalar(ptr inreg
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN2-NEXT:    v_cmp_lt_u64_e64 s[34:35], s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e64 s[34:35], s[6:7], v[0:1]
 ; GCN2-NEXT:    v_add_u32_e64 v0, s[36:37], -1, v0
 ; GCN2-NEXT:    s_or_b64 vcc, vcc, s[34:35]
 ; GCN2-NEXT:    v_addc_u32_e64 v1, s[36:37], -1, v1, s[36:37]
@@ -22654,7 +22654,7 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i64_noret_offset_scalar(ptr inreg
 ; GCN3-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
 ; GCN3-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN3-NEXT:    v_cmp_lt_u64_e64 s[34:35], s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_le_u64_e64 s[34:35], s[6:7], v[0:1]
 ; GCN3-NEXT:    v_add_co_u32_e64 v0, s[36:37], -1, v0
 ; GCN3-NEXT:    s_or_b64 vcc, vcc, s[34:35]
 ; GCN3-NEXT:    v_addc_co_u32_e64 v1, s[36:37], -1, v1, s[36:37]
@@ -22707,7 +22707,7 @@ define amdgpu_gfx i64 @flat_atomic_udec_wrap_i64_ret_scalar(ptr inreg %ptr, i64
 ; GCN1-NEXT:    v_add_i32_e64 v6, s[36:37], -1, v0
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN1-NEXT:    v_cmp_lt_u64_e64 s[34:35], s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e64 s[34:35], s[6:7], v[0:1]
 ; GCN1-NEXT:    v_addc_u32_e64 v7, s[36:37], -1, v1, s[36:37]
 ; GCN1-NEXT:    s_or_b64 vcc, vcc, s[34:35]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
@@ -22754,7 +22754,7 @@ define amdgpu_gfx i64 @flat_atomic_udec_wrap_i64_ret_scalar(ptr inreg %ptr, i64
 ; GCN2-NEXT:    v_add_u32_e64 v6, s[36:37], -1, v0
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN2-NEXT:    v_cmp_lt_u64_e64 s[34:35], s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e64 s[34:35], s[6:7], v[0:1]
 ; GCN2-NEXT:    v_addc_u32_e64 v7, s[36:37], -1, v1, s[36:37]
 ; GCN2-NEXT:    s_or_b64 vcc, vcc, s[34:35]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
@@ -22797,7 +22797,7 @@ define amdgpu_gfx i64 @flat_atomic_udec_wrap_i64_ret_scalar(ptr inreg %ptr, i64
 ; GCN3-NEXT:    v_add_co_u32_e64 v5, s[36:37], -1, v0
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
 ; GCN3-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN3-NEXT:    v_cmp_lt_u64_e64 s[34:35], s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_le_u64_e64 s[34:35], s[6:7], v[0:1]
 ; GCN3-NEXT:    v_addc_co_u32_e64 v6, s[36:37], -1, v1, s[36:37]
 ; GCN3-NEXT:    s_or_b64 vcc, vcc, s[34:35]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
@@ -22851,7 +22851,7 @@ define amdgpu_gfx i64 @flat_atomic_udec_wrap_i64_ret_offset_scalar(ptr inreg %ou
 ; GCN1-NEXT:    v_add_i32_e64 v6, s[36:37], -1, v0
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
 ; GCN1-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN1-NEXT:    v_cmp_lt_u64_e64 s[34:35], s[6:7], v[0:1]
+; GCN1-NEXT:    v_cmp_le_u64_e64 s[34:35], s[6:7], v[0:1]
 ; GCN1-NEXT:    v_addc_u32_e64 v7, s[36:37], -1, v1, s[36:37]
 ; GCN1-NEXT:    s_or_b64 vcc, vcc, s[34:35]
 ; GCN1-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
@@ -22900,7 +22900,7 @@ define amdgpu_gfx i64 @flat_atomic_udec_wrap_i64_ret_offset_scalar(ptr inreg %ou
 ; GCN2-NEXT:    v_add_u32_e64 v6, s[36:37], -1, v0
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
 ; GCN2-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN2-NEXT:    v_cmp_lt_u64_e64 s[34:35], s[6:7], v[0:1]
+; GCN2-NEXT:    v_cmp_le_u64_e64 s[34:35], s[6:7], v[0:1]
 ; GCN2-NEXT:    v_addc_u32_e64 v7, s[36:37], -1, v1, s[36:37]
 ; GCN2-NEXT:    s_or_b64 vcc, vcc, s[34:35]
 ; GCN2-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
@@ -22945,7 +22945,7 @@ define amdgpu_gfx i64 @flat_atomic_udec_wrap_i64_ret_offset_scalar(ptr inreg %ou
 ; GCN3-NEXT:    v_add_co_u32_e64 v5, s[36:37], -1, v0
 ; GCN3-NEXT:    s_waitcnt vmcnt(0)
 ; GCN3-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN3-NEXT:    v_cmp_lt_u64_e64 s[34:35], s[6:7], v[0:1]
+; GCN3-NEXT:    v_cmp_le_u64_e64 s[34:35], s[6:7], v[0:1]
 ; GCN3-NEXT:    v_addc_co_u32_e64 v6, s[36:37], -1, v1, s[36:37]
 ; GCN3-NEXT:    s_or_b64 vcc, vcc, s[34:35]
 ; GCN3-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll
index fe47461ebf956..517b34d48a154 100644
--- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll
@@ -3857,7 +3857,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_scalar(ptr inreg %ptr, i64 inr
 ; GFX7-NEXT:  .LBB84_1: ; %atomicrmw.start
 ; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
+; GFX7-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[2:3]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -3892,7 +3892,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_scalar(ptr inreg %ptr, i64 inr
 ; GFX8-NEXT:  .LBB84_1: ; %atomicrmw.start
 ; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
+; GFX8-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -3922,7 +3922,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_scalar(ptr inreg %ptr, i64 inr
 ; GFX9-NEXT:  .LBB84_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -3961,7 +3961,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_offset_scalar(ptr inreg %out,
 ; GFX7-NEXT:  .LBB85_1: ; %atomicrmw.start
 ; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
+; GFX7-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[2:3]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -3996,7 +3996,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_offset_scalar(ptr inreg %out,
 ; GFX8-NEXT:  .LBB85_1: ; %atomicrmw.start
 ; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
+; GFX8-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -4026,7 +4026,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_offset_scalar(ptr inreg %out,
 ; GFX9-NEXT:  .LBB85_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc
@@ -4068,7 +4068,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX7-NEXT:    v_mov_b32_e32 v8, v0
-; GFX7-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
+; GFX7-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -4103,7 +4103,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v8, v0
-; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
+; GFX8-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -4133,7 +4133,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v0
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -4172,7 +4172,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_offset_scalar(ptr inreg %out, i64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX7-NEXT:    v_mov_b32_e32 v8, v0
-; GFX7-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
+; GFX7-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -4207,7 +4207,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_offset_scalar(ptr inreg %out, i64
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v8, v0
-; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
+; GFX8-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -4237,7 +4237,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_offset_scalar(ptr inreg %out, i64
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v0
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] offset:32 glc
@@ -4275,7 +4275,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GFX7-NEXT:  .LBB88_1: ; %atomicrmw.start
 ; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
+; GFX7-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[2:3]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -4309,7 +4309,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GFX8-NEXT:  .LBB88_1: ; %atomicrmw.start
 ; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
+; GFX8-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -4341,7 +4341,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GFX9-NEXT:  .LBB88_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc
@@ -4383,7 +4383,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX7-NEXT:    v_mov_b32_e32 v8, v2
-; GFX7-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[8:9]
+; GFX7-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -4420,7 +4420,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX8-NEXT:    v_mov_b32_e32 v8, v2
-; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[8:9]
+; GFX8-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -4455,7 +4455,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v2
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[8:9]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] offset:32 glc
@@ -4497,7 +4497,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GFX7-NEXT:  .LBB90_1: ; %atomicrmw.start
 ; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
+; GFX7-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[2:3]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -4529,7 +4529,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GFX8-NEXT:  .LBB90_1: ; %atomicrmw.start
 ; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
+; GFX8-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -4561,7 +4561,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index)
 ; GFX9-NEXT:  .LBB90_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -4600,7 +4600,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX7-NEXT:    v_mov_b32_e32 v8, v2
-; GFX7-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[8:9]
+; GFX7-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -4635,7 +4635,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX8-NEXT:    v_mov_b32_e32 v8, v2
-; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[8:9]
+; GFX8-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -4670,7 +4670,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v2
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[8:9]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -5239,7 +5239,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_scalar(ptr inreg %ptr, i64 in
 ; GFX7-NEXT:  .LBB98_1: ; %atomicrmw.start
 ; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
+; GFX7-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[2:3]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -5274,7 +5274,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_scalar(ptr inreg %ptr, i64 in
 ; GFX8-NEXT:  .LBB98_1: ; %atomicrmw.start
 ; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
+; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -5304,7 +5304,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_scalar(ptr inreg %ptr, i64 in
 ; GFX9-NEXT:  .LBB98_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -5343,7 +5343,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_offset_scalar(ptr inreg %out,
 ; GFX7-NEXT:  .LBB99_1: ; %atomicrmw.start
 ; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
+; GFX7-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[2:3]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -5378,7 +5378,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_offset_scalar(ptr inreg %out,
 ; GFX8-NEXT:  .LBB99_1: ; %atomicrmw.start
 ; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
+; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -5408,7 +5408,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_offset_scalar(ptr inreg %out,
 ; GFX9-NEXT:  .LBB99_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc
@@ -5450,7 +5450,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX7-NEXT:    v_mov_b32_e32 v8, v0
-; GFX7-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
+; GFX7-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -5485,7 +5485,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v8, v0
-; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
+; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -5515,7 +5515,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v0
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -5554,7 +5554,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_offset_scalar(ptr inreg %out, i6
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX7-NEXT:    v_mov_b32_e32 v8, v0
-; GFX7-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
+; GFX7-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -5589,7 +5589,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_offset_scalar(ptr inreg %out, i6
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v8, v0
-; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
+; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -5619,7 +5619,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_offset_scalar(ptr inreg %out, i6
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v0
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] offset:32 glc
@@ -5657,7 +5657,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64
 ; GFX7-NEXT:  .LBB102_1: ; %atomicrmw.start
 ; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[2:3]
+; GFX7-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[2:3]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -5691,7 +5691,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64
 ; GFX8-NEXT:  .LBB102_1: ; %atomicrmw.start
 ; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[2:3]
+; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -5723,7 +5723,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64
 ; GFX9-NEXT:  .LBB102_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[2:3]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc
@@ -5765,7 +5765,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX7-NEXT:    v_mov_b32_e32 v8, v2
-; GFX7-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[8:9]
+; GFX7-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -5802,7 +5802,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX8-NEXT:    v_mov_b32_e32 v8, v2
-; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[8:9]
+; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -5837,7 +5837,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v2
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[8:9]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] offset:32 glc
@@ -5880,7 +5880,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 %
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX7-NEXT:    v_mov_b32_e32 v8, v2
-; GFX7-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[8:9]
+; GFX7-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -5915,7 +5915,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 %
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX8-NEXT:    v_mov_b32_e32 v8, v2
-; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[8:9]
+; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -5950,7 +5950,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 %
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v2
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[8:9]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -6519,7 +6519,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_scalar(ptr inreg %ptr, i64 in
 ; GFX7-NEXT:  .LBB111_1: ; %atomicrmw.start
 ; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
+; GFX7-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[2:3]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -6554,7 +6554,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_scalar(ptr inreg %ptr, i64 in
 ; GFX8-NEXT:  .LBB111_1: ; %atomicrmw.start
 ; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
+; GFX8-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -6584,7 +6584,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_scalar(ptr inreg %ptr, i64 in
 ; GFX9-NEXT:  .LBB111_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
+; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -6623,7 +6623,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_offset_scalar(ptr inreg %out,
 ; GFX7-NEXT:  .LBB112_1: ; %atomicrmw.start
 ; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
+; GFX7-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[2:3]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -6658,7 +6658,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_offset_scalar(ptr inreg %out,
 ; GFX8-NEXT:  .LBB112_1: ; %atomicrmw.start
 ; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
+; GFX8-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -6688,7 +6688,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_offset_scalar(ptr inreg %out,
 ; GFX9-NEXT:  .LBB112_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
+; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc
@@ -6730,7 +6730,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX7-NEXT:    v_mov_b32_e32 v8, v0
-; GFX7-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
+; GFX7-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -6765,7 +6765,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v8, v0
-; GFX8-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
+; GFX8-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -6795,7 +6795,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v0
-; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
+; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -6834,7 +6834,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_offset_scalar(ptr inreg %out, i6
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX7-NEXT:    v_mov_b32_e32 v8, v0
-; GFX7-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
+; GFX7-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -6869,7 +6869,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_offset_scalar(ptr inreg %out, i6
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v8, v0
-; GFX8-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
+; GFX8-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -6899,7 +6899,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_offset_scalar(ptr inreg %out, i6
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v0
-; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
+; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] offset:32 glc
@@ -7463,7 +7463,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_scalar(ptr inreg %ptr, i64 inr
 ; GFX7-NEXT:  .LBB121_1: ; %atomicrmw.start
 ; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
+; GFX7-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[2:3]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -7498,7 +7498,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_scalar(ptr inreg %ptr, i64 inr
 ; GFX8-NEXT:  .LBB121_1: ; %atomicrmw.start
 ; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
+; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -7528,7 +7528,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_scalar(ptr inreg %ptr, i64 inr
 ; GFX9-NEXT:  .LBB121_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -7567,7 +7567,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_offset_scalar(ptr inreg %out,
 ; GFX7-NEXT:  .LBB122_1: ; %atomicrmw.start
 ; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
+; GFX7-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[2:3]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -7602,7 +7602,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_offset_scalar(ptr inreg %out,
 ; GFX8-NEXT:  .LBB122_1: ; %atomicrmw.start
 ; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
+; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -7632,7 +7632,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_offset_scalar(ptr inreg %out,
 ; GFX9-NEXT:  .LBB122_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc
@@ -7674,7 +7674,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX7-NEXT:    v_mov_b32_e32 v8, v0
-; GFX7-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
+; GFX7-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -7709,7 +7709,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v8, v0
-; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
+; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -7739,7 +7739,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_scalar(ptr inreg %ptr, i64 inreg
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v0
-; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -7778,7 +7778,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_offset_scalar(ptr inreg %out, i64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX7-NEXT:    v_mov_b32_e32 v8, v0
-; GFX7-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
+; GFX7-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -7813,7 +7813,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_offset_scalar(ptr inreg %out, i64
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v8, v0
-; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
+; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -7843,7 +7843,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_offset_scalar(ptr inreg %out, i64
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v0
-; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] offset:32 glc
@@ -7881,7 +7881,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GFX7-NEXT:  .LBB125_1: ; %atomicrmw.start
 ; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
+; GFX7-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[2:3]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -7915,7 +7915,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GFX8-NEXT:  .LBB125_1: ; %atomicrmw.start
 ; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
+; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -7947,7 +7947,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %
 ; GFX9-NEXT:  .LBB125_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc
@@ -7989,7 +7989,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX7-NEXT:    v_mov_b32_e32 v8, v2
-; GFX7-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[8:9]
+; GFX7-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -8026,7 +8026,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX8-NEXT:    v_mov_b32_e32 v8, v2
-; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[8:9]
+; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -8061,7 +8061,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2,
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v2
-; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[8:9]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] offset:32 glc
@@ -8101,7 +8101,7 @@ define amdgpu_kernel void @atomic_min_i64(ptr %out, i64 %in) {
 ; GFX7-NEXT:  .LBB127_1: ; %atomicrmw.start
 ; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
+; GFX7-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[2:3]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -8131,7 +8131,7 @@ define amdgpu_kernel void @atomic_min_i64(ptr %out, i64 %in) {
 ; GFX8-NEXT:  .LBB127_1: ; %atomicrmw.start
 ; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
+; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -8161,7 +8161,7 @@ define amdgpu_kernel void @atomic_min_i64(ptr %out, i64 %in) {
 ; GFX9-NEXT:  .LBB127_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -8199,7 +8199,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX7-NEXT:    v_mov_b32_e32 v8, v2
-; GFX7-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[8:9]
+; GFX7-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -8234,7 +8234,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX8-NEXT:    v_mov_b32_e32 v8, v2
-; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[8:9]
+; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -8269,7 +8269,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v9, v3
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v2
-; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[8:9]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
index 462d7748b86cd..ec4baac0c285c 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
@@ -3961,10 +3961,10 @@ define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
 ; SI-NEXT:    v_lshlrev_b32_e32 v4, 12, v3
 ; SI-NEXT:    v_or_b32_e32 v2, v5, v2
 ; SI-NEXT:    v_or_b32_e32 v4, v0, v4
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v3
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v3
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; SI-NEXT:    v_and_b32_e32 v4, 7, v2
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v4
 ; SI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v4
 ; SI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
@@ -3972,7 +3972,7 @@ define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
 ; SI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
 ; SI-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; SI-NEXT:    v_mov_b32_e32 v4, 0x7c00
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v3
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; SI-NEXT:    v_mov_b32_e32 v5, 0x7e00
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
@@ -4011,10 +4011,10 @@ define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
 ; VI-NEXT:    v_lshlrev_b32_e32 v4, 12, v3
 ; VI-NEXT:    v_or_b32_e32 v2, v5, v2
 ; VI-NEXT:    v_or_b32_e32 v4, v0, v4
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v3
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; VI-NEXT:    v_and_b32_e32 v4, 7, v2
-; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
+; VI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v4
 ; VI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v4
 ; VI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
@@ -4022,7 +4022,7 @@ define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
 ; VI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
 ; VI-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
 ; VI-NEXT:    v_mov_b32_e32 v4, 0x7c00
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; VI-NEXT:    v_mov_b32_e32 v5, 0x7e00
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
@@ -4062,12 +4062,12 @@ define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xfffffc10, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_lshl_or_b32 v4, v3, 12, v0
-; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v3
+; GFX11-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v3
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_and_b32_e32 v4, 7, v2
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
-; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v4
+; GFX11-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v4
 ; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v4
 ; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc_lo
@@ -4077,7 +4077,7 @@ define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
 ; GFX11-NEXT:    v_dual_mov_b32 v5, 0x7e00 :: v_dual_add_nc_u32 v2, v2, v4
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7c00, v5, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v3
+; GFX11-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v3
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7c00, v2, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -4113,10 +4113,10 @@ define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; SI-NEXT:    v_lshlrev_b32_e32 v4, 12, v3
 ; SI-NEXT:    v_or_b32_e32 v2, v5, v2
 ; SI-NEXT:    v_or_b32_e32 v4, v0, v4
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v3
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v3
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; SI-NEXT:    v_and_b32_e32 v4, 7, v2
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v4
 ; SI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v4
 ; SI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
@@ -4124,7 +4124,7 @@ define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; SI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
 ; SI-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; SI-NEXT:    v_mov_b32_e32 v4, 0x7c00
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v3
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; SI-NEXT:    v_mov_b32_e32 v5, 0x7e00
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
@@ -4161,10 +4161,10 @@ define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; VI-NEXT:    v_lshlrev_b32_e32 v4, 12, v3
 ; VI-NEXT:    v_or_b32_e32 v2, v5, v2
 ; VI-NEXT:    v_or_b32_e32 v4, v0, v4
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v3
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; VI-NEXT:    v_and_b32_e32 v4, 7, v2
-; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
+; VI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v4
 ; VI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v4
 ; VI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
@@ -4172,7 +4172,7 @@ define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; VI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
 ; VI-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
 ; VI-NEXT:    v_mov_b32_e32 v4, 0x7c00
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; VI-NEXT:    v_mov_b32_e32 v5, 0x7e00
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
@@ -4210,13 +4210,13 @@ define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; GFX11-NEXT:    v_or_b32_e32 v2, v5, v2
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xfffffc10, v3
 ; GFX11-NEXT:    v_lshl_or_b32 v4, v3, 12, v0
-; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v3
+; GFX11-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc_lo
 ; GFX11-NEXT:    v_and_b32_e32 v4, 7, v2
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v4
+; GFX11-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v4
 ; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v4
 ; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc_lo
@@ -4225,7 +4225,7 @@ define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_dual_mov_b32 v5, 0x7e00 :: v_dual_add_nc_u32 v2, v2, v4
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7c00, v5, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v3
+; GFX11-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7c00, v2, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v3
@@ -4265,10 +4265,10 @@ define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) #0
 ; SI-NEXT:    v_lshlrev_b32_e32 v5, 12, v4
 ; SI-NEXT:    v_or_b32_e32 v2, v6, v2
 ; SI-NEXT:    v_or_b32_e32 v5, v0, v5
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v4
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v4
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
 ; SI-NEXT:    v_and_b32_e32 v5, 7, v2
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v5
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v5
 ; SI-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v5
 ; SI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
@@ -4276,7 +4276,7 @@ define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) #0
 ; SI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
 ; SI-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
 ; SI-NEXT:    v_mov_b32_e32 v5, 0x7c00
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v4
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v4
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
 ; SI-NEXT:    v_mov_b32_e32 v6, 0x7e00
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
@@ -4317,10 +4317,10 @@ define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) #0
 ; VI-NEXT:    v_lshlrev_b32_e32 v6, 12, v5
 ; VI-NEXT:    v_or_b32_e32 v4, v7, v4
 ; VI-NEXT:    v_or_b32_e32 v6, v0, v6
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v5
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v5
 ; VI-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
 ; VI-NEXT:    v_and_b32_e32 v6, 7, v4
-; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v6
+; VI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v6
 ; VI-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v6
 ; VI-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
@@ -4328,7 +4328,7 @@ define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) #0
 ; VI-NEXT:    v_lshrrev_b32_e32 v4, 2, v4
 ; VI-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
 ; VI-NEXT:    v_mov_b32_e32 v6, 0x7c00
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v5
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v5
 ; VI-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
 ; VI-NEXT:    v_mov_b32_e32 v7, 0x7e00
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
@@ -4367,13 +4367,13 @@ define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) #0
 ; GFX11-NEXT:    v_or_b32_e32 v3, v6, v3
 ; GFX11-NEXT:    v_add_nc_u32_e32 v4, 0xfffffc10, v4
 ; GFX11-NEXT:    v_lshl_or_b32 v5, v4, 12, v2
-; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v4
+; GFX11-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v4
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc_lo
 ; GFX11-NEXT:    v_and_b32_e32 v5, 7, v3
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 2, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v5
+; GFX11-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v5
 ; GFX11-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v5
 ; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc_lo
@@ -4382,7 +4382,7 @@ define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) #0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_dual_mov_b32 v6, 0x7e00 :: v_dual_add_nc_u32 v3, v3, v5
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7c00, v6, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v4
+; GFX11-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v4
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7c00, v3, vcc_lo
@@ -4528,10 +4528,10 @@ define { half, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f16(double %a, do
 ; SI-NEXT:    v_lshlrev_b32_e32 v7, 12, v6
 ; SI-NEXT:    v_or_b32_e32 v5, v8, v5
 ; SI-NEXT:    v_or_b32_e32 v7, v4, v7
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v6
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v6
 ; SI-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
 ; SI-NEXT:    v_and_b32_e32 v7, 7, v5
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v7
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v7
 ; SI-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v7
 ; SI-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
@@ -4539,7 +4539,7 @@ define { half, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f16(double %a, do
 ; SI-NEXT:    v_lshrrev_b32_e32 v5, 2, v5
 ; SI-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
 ; SI-NEXT:    v_mov_b32_e32 v7, 0x7c00
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v6
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v6
 ; SI-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
 ; SI-NEXT:    v_mov_b32_e32 v8, 0x7e00
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
@@ -4579,10 +4579,10 @@ define { half, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f16(double %a, do
 ; VI-NEXT:    v_lshlrev_b32_e32 v7, 12, v6
 ; VI-NEXT:    v_or_b32_e32 v5, v8, v5
 ; VI-NEXT:    v_or_b32_e32 v7, v4, v7
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v6
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v6
 ; VI-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
 ; VI-NEXT:    v_and_b32_e32 v7, 7, v5
-; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v7
+; VI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v7
 ; VI-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v7
 ; VI-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
@@ -4591,7 +4591,7 @@ define { half, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f16(double %a, do
 ; VI-NEXT:    v_lshrrev_b32_e32 v5, 2, v5
 ; VI-NEXT:    v_add_u32_e32 v5, vcc, v5, v7
 ; VI-NEXT:    v_mov_b32_e32 v7, 0x7c00
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v6
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v6
 ; VI-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
 ; VI-NEXT:    v_mov_b32_e32 v8, 0x7e00
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
@@ -4632,12 +4632,12 @@ define { half, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f16(double %a, do
 ; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0xfffffc10, v6
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_lshl_or_b32 v7, v6, 12, v4
-; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v6
+; GFX11-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v6
 ; GFX11-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_and_b32_e32 v0, 7, v5
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 2, v5
-; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v0
+; GFX11-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
@@ -4647,7 +4647,7 @@ define { half, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f16(double %a, do
 ; GFX11-NEXT:    v_dual_mov_b32 v7, 0x7e00 :: v_dual_add_nc_u32 v0, v5, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0x7c00, v7, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v6
+; GFX11-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v6
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7c00, v0, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v6
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -4688,10 +4688,10 @@ define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; SI-NEXT:    v_lshlrev_b32_e32 v4, 12, v3
 ; SI-NEXT:    v_or_b32_e32 v2, v5, v2
 ; SI-NEXT:    v_or_b32_e32 v4, v0, v4
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v3
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v3
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; SI-NEXT:    v_and_b32_e32 v4, 7, v2
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v4
 ; SI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v4
 ; SI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
@@ -4699,7 +4699,7 @@ define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; SI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
 ; SI-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; SI-NEXT:    v_mov_b32_e32 v4, 0x7c00
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v3
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; SI-NEXT:    v_mov_b32_e32 v5, 0x7e00
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
@@ -4738,10 +4738,10 @@ define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; VI-NEXT:    v_lshlrev_b32_e32 v4, 12, v3
 ; VI-NEXT:    v_or_b32_e32 v2, v5, v2
 ; VI-NEXT:    v_or_b32_e32 v4, v0, v4
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v3
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; VI-NEXT:    v_and_b32_e32 v4, 7, v2
-; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
+; VI-NEXT:    v_cmp_le_i32_e32 vcc, 5, v4
 ; VI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v4
 ; VI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
@@ -4749,7 +4749,7 @@ define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; VI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
 ; VI-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
 ; VI-NEXT:    v_mov_b32_e32 v4, 0x7c00
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; VI-NEXT:    v_mov_b32_e32 v5, 0x7e00
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
@@ -4788,13 +4788,13 @@ define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; GFX11-NEXT:    v_or_b32_e32 v2, v5, v2
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xfffffc10, v3
 ; GFX11-NEXT:    v_lshl_or_b32 v4, v3, 12, v0
-; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v3
+; GFX11-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 1, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc_lo
 ; GFX11-NEXT:    v_and_b32_e32 v4, 7, v2
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v4
+; GFX11-NEXT:    v_cmp_le_i32_e32 vcc_lo, 5, v4
 ; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v4
 ; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc_lo
@@ -4803,7 +4803,7 @@ define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_dual_mov_b32 v5, 0x7e00 :: v_dual_add_nc_u32 v2, v2, v4
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7c00, v5, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v3
+; GFX11-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 31, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7c00, v2, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v3
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
index 1b092b283290a..6bdbd19092234 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
@@ -937,7 +937,7 @@ define double @cospiD_pattern0(i32 %arg, double %arg1, double %arg2) {
 ; GCN-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v4, vcc
 ; GCN-NEXT:    v_bfrev_b32_e32 v2, 1
-; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, 1, v0
+; GCN-NEXT:    v_cmp_le_i32_e32 vcc, 1, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GCN-NEXT:    v_xor_b32_e32 v1, v1, v0
 ; GCN-NEXT:    v_mov_b32_e32 v0, v3
@@ -951,7 +951,7 @@ define double @cospiD_pattern0(i32 %arg, double %arg1, double %arg2) {
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v5
 ; GFX11-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc_lo
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 1, v0
+; GFX11-NEXT:    v_cmp_le_i32_e32 vcc_lo, 1, v0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 0x80000000, vcc_lo
 ; GFX11-NEXT:    v_xor_b32_e32 v1, v1, v0
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v3
@@ -978,7 +978,7 @@ define double @cospiD_pattern1(i32 %arg, double %arg1, double %arg2) {
 ; GCN-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v4, vcc
 ; GCN-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
-; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, 1, v0
+; GCN-NEXT:    v_cmp_le_i32_e32 vcc, 1, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v0, v3
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
@@ -991,7 +991,7 @@ define double @cospiD_pattern1(i32 %arg, double %arg1, double %arg2) {
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v5
 ; GFX11-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc_lo
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 1, v0
+; GFX11-NEXT:    v_cmp_le_i32_e32 vcc_lo, 1, v0
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
@@ -1016,7 +1016,7 @@ define float @cospiD_pattern0_half(i16 %arg, float %arg1, float %arg2) {
 ; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX7-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX7-NEXT:    v_mov_b32_e32 v2, 0xffff8000
-; GFX7-NEXT:    v_cmp_lt_i32_e32 vcc, 1, v3
+; GFX7-NEXT:    v_cmp_le_i32_e32 vcc, 1, v3
 ; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff, v0
 ; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX7-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -1087,7 +1087,7 @@ define float @cospiD_pattern1_half(i16 %arg, float %arg1, float %arg2) {
 ; GFX7-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX7-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
-; GFX7-NEXT:    v_cmp_lt_i32_e32 vcc, 1, v3
+; GFX7-NEXT:    v_cmp_le_i32_e32 vcc, 1, v3
 ; GFX7-NEXT:    v_cndmask_b32_e64 v0, v0, -v0, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll
index 63ba18a5433aa..33206bb2118ef 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -559,7 +559,7 @@ define float @fmul_fly_pow_mul_min_pow2(i64 %cnt) nounwind {
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_lshlrev_b64 v[0:1], v0, 8
 ; VI-NEXT:    s_mov_b64 s[4:5], 0x2000
-; VI-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1]
+; VI-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[0:1]
 ; VI-NEXT:    v_mov_b32_e32 v2, 0x2000
 ; VI-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
@@ -578,7 +578,7 @@ define float @fmul_fly_pow_mul_min_pow2(i64 %cnt) nounwind {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v0, 8
-; GFX10-NEXT:    v_cmp_gt_u64_e32 vcc_lo, 0x2000, v[0:1]
+; GFX10-NEXT:    v_cmp_ge_u64_e32 vcc_lo, 0x2000, v[0:1]
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x2000, v0, vcc_lo
 ; GFX10-NEXT:    v_ffbh_u32_e32 v2, v1
@@ -597,7 +597,7 @@ define float @fmul_fly_pow_mul_min_pow2(i64 %cnt) nounwind {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v0, 8
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cmp_gt_u64_e32 vcc_lo, 0x2000, v[0:1]
+; GFX11-NEXT:    v_cmp_ge_u64_e32 vcc_lo, 0x2000, v[0:1]
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc_lo
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x2000, v0, vcc_lo
 ; GFX11-NEXT:    v_clz_i32_u32_e32 v2, v1
diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
index 189b897793381..66244e56047e8 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
@@ -11,7 +11,7 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; SDAG-NEXT:    v_mov_b32_e32 v7, 0
 ; SDAG-NEXT:    s_mov_b64 s[4:5], 0x3fe
 ; SDAG-NEXT:    v_mov_b32_e32 v4, v0
-; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[6:7]
+; SDAG-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[6:7]
 ; SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v1, 0
@@ -26,8 +26,8 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v7, vcc
 ; SDAG-NEXT:    s_mov_b32 s7, -1
 ; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
-; SDAG-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[4:5]
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[6:7], s[6:7], v[0:1]
+; SDAG-NEXT:    v_cmp_le_i64_e32 vcc, -1, v[4:5]
 ; SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
@@ -39,7 +39,7 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; SDAG-NEXT:    v_add_co_u32_e64 v9, s[4:5], -1, v0
 ; SDAG-NEXT:    s_mov_b64 s[4:5], 0x432
 ; SDAG-NEXT:    v_and_b32_e32 v0, 0xfffff, v5
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[4:5], v[6:7]
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[4:5], s[4:5], v[6:7]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v8, -1, 0, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v10, -1, 1, vcc
 ; SDAG-NEXT:    v_or_b32_e32 v5, 0x100000, v0
@@ -54,7 +54,7 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; SDAG-NEXT:    v_add_u32_e32 v7, 0xfffffbcd, v6
 ; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v0, v[4:5]
 ; SDAG-NEXT:    v_lshlrev_b64 v[2:3], v2, v[4:5]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v7
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v7
 ; SDAG-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[4:5]
 ; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v7
 ; SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, v1, s[6:7]
@@ -93,7 +93,7 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; SDAG-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
 ; SDAG-NEXT:    v_sub_u32_e32 v2, 0x433, v6
 ; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v2, v[4:5]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v2
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v2
 ; SDAG-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v2
 ; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, v0, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, v1, s[4:5]
@@ -155,8 +155,8 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v9, s[6:7], 0, -1, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, -1, v[8:9]
-; GISEL-NEXT:    v_cmp_lt_i64_e64 s[4:5], -1, v[4:5]
+; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, -1, v[8:9]
+; GISEL-NEXT:    v_cmp_le_i64_e64 s[4:5], -1, v[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, -1, v[8:9]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
@@ -233,7 +233,7 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
 ; GISEL-NEXT:    v_add_u32_e32 v7, 0xfffffbcd, v6
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v7, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v7
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v1, vcc
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v10, v9, 0
@@ -268,7 +268,7 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
 ; GISEL-NEXT:    v_sub_co_u32_e32 v2, vcc, 0x433, v6
 ; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v2, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
@@ -375,7 +375,7 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; SDAG-NEXT:    v_mov_b32_e32 v7, 0
 ; SDAG-NEXT:    s_mov_b64 s[4:5], 0x3fe
 ; SDAG-NEXT:    v_mov_b32_e32 v4, v0
-; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[6:7]
+; SDAG-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[6:7]
 ; SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v1, 0
@@ -390,8 +390,8 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v7, vcc
 ; SDAG-NEXT:    s_mov_b32 s7, -1
 ; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
-; SDAG-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[4:5]
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[6:7], s[6:7], v[0:1]
+; SDAG-NEXT:    v_cmp_le_i64_e32 vcc, -1, v[4:5]
 ; SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
@@ -403,7 +403,7 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; SDAG-NEXT:    v_add_co_u32_e64 v9, s[4:5], -1, v0
 ; SDAG-NEXT:    s_mov_b64 s[4:5], 0x432
 ; SDAG-NEXT:    v_and_b32_e32 v0, 0xfffff, v5
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[4:5], v[6:7]
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[4:5], s[4:5], v[6:7]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v8, -1, 0, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v10, -1, 1, vcc
 ; SDAG-NEXT:    v_or_b32_e32 v5, 0x100000, v0
@@ -418,7 +418,7 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; SDAG-NEXT:    v_add_u32_e32 v7, 0xfffffbcd, v6
 ; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v0, v[4:5]
 ; SDAG-NEXT:    v_lshlrev_b64 v[2:3], v2, v[4:5]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v7
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v7
 ; SDAG-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[4:5]
 ; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v7
 ; SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, v1, s[6:7]
@@ -457,7 +457,7 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; SDAG-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
 ; SDAG-NEXT:    v_sub_u32_e32 v2, 0x433, v6
 ; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v2, v[4:5]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v2
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v2
 ; SDAG-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v2
 ; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, v0, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, v1, s[4:5]
@@ -519,8 +519,8 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v9, s[6:7], 0, -1, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, -1, v[8:9]
-; GISEL-NEXT:    v_cmp_lt_i64_e64 s[4:5], -1, v[4:5]
+; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, -1, v[8:9]
+; GISEL-NEXT:    v_cmp_le_i64_e64 s[4:5], -1, v[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, -1, v[8:9]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
@@ -597,7 +597,7 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
 ; GISEL-NEXT:    v_add_u32_e32 v7, 0xfffffbcd, v6
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v7, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v7
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v1, vcc
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v10, v9, 0
@@ -632,7 +632,7 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
 ; GISEL-NEXT:    v_sub_co_u32_e32 v2, vcc, 0x433, v6
 ; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v2, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
@@ -742,7 +742,7 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; SDAG-NEXT:    v_mov_b32_e32 v6, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v3, 0
-; SDAG-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v5
+; SDAG-NEXT:    v_cmp_le_u32_e32 vcc, s4, v5
 ; SDAG-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; SDAG-NEXT:    s_cbranch_execz .LBB2_10
 ; SDAG-NEXT:  ; %bb.1: ; %fp-to-i-if-end
@@ -753,8 +753,8 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v6, vcc
 ; SDAG-NEXT:    s_mov_b32 s7, -1
 ; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
-; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v4
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[6:7], s[6:7], v[0:1]
+; SDAG-NEXT:    v_cmp_le_i32_e32 vcc, -1, v4
 ; SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
@@ -766,7 +766,7 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; SDAG-NEXT:    v_add_co_u32_e64 v9, s[4:5], -1, v0
 ; SDAG-NEXT:    s_mov_b64 s[4:5], 0x95
 ; SDAG-NEXT:    v_and_b32_e32 v0, 0x7fffff, v4
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[4:5], v[5:6]
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[4:5], s[4:5], v[5:6]
 ; SDAG-NEXT:    v_mov_b32_e32 v7, 0
 ; SDAG-NEXT:    v_cndmask_b32_e64 v8, -1, 0, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v10, -1, 1, vcc
@@ -782,7 +782,7 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; SDAG-NEXT:    v_add_u32_e32 v4, 0xffffff6a, v5
 ; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v0, v[6:7]
 ; SDAG-NEXT:    v_lshlrev_b64 v[2:3], v2, v[6:7]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v4
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v4
 ; SDAG-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[4:5]
 ; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v4
 ; SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, v1, s[6:7]
@@ -821,7 +821,7 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; SDAG-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
 ; SDAG-NEXT:    v_sub_u32_e32 v2, 0x96, v5
 ; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v2, v[6:7]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v2
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v2
 ; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, v0, s[4:5]
 ; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v2
 ; SDAG-NEXT:    v_cndmask_b32_e64 v3, v0, v6, s[4:5]
@@ -876,8 +876,8 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v9, s[6:7], 0, -1, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, -1, v[8:9]
-; GISEL-NEXT:    v_cmp_lt_i32_e64 s[4:5], -1, v4
+; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, -1, v[8:9]
+; GISEL-NEXT:    v_cmp_le_i32_e64 s[4:5], -1, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, -1, v[8:9]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
@@ -954,7 +954,7 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
 ; GISEL-NEXT:    v_add_u32_e32 v7, 0xffffff6a, v6
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v7, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v7
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v1, vcc
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v10, v8, 0
@@ -989,7 +989,7 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
 ; GISEL-NEXT:    v_sub_co_u32_e32 v2, vcc, 0x96, v6
 ; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v2, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
@@ -1093,7 +1093,7 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; SDAG-NEXT:    v_mov_b32_e32 v6, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v3, 0
-; SDAG-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v5
+; SDAG-NEXT:    v_cmp_le_u32_e32 vcc, s4, v5
 ; SDAG-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; SDAG-NEXT:    s_cbranch_execz .LBB3_10
 ; SDAG-NEXT:  ; %bb.1: ; %fp-to-i-if-end
@@ -1104,8 +1104,8 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v6, vcc
 ; SDAG-NEXT:    s_mov_b32 s7, -1
 ; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
-; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v4
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[6:7], s[6:7], v[0:1]
+; SDAG-NEXT:    v_cmp_le_i32_e32 vcc, -1, v4
 ; SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
@@ -1117,7 +1117,7 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; SDAG-NEXT:    v_add_co_u32_e64 v9, s[4:5], -1, v0
 ; SDAG-NEXT:    s_mov_b64 s[4:5], 0x95
 ; SDAG-NEXT:    v_and_b32_e32 v0, 0x7fffff, v4
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[4:5], v[5:6]
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[4:5], s[4:5], v[5:6]
 ; SDAG-NEXT:    v_mov_b32_e32 v7, 0
 ; SDAG-NEXT:    v_cndmask_b32_e64 v8, -1, 0, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v10, -1, 1, vcc
@@ -1133,7 +1133,7 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; SDAG-NEXT:    v_add_u32_e32 v4, 0xffffff6a, v5
 ; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v0, v[6:7]
 ; SDAG-NEXT:    v_lshlrev_b64 v[2:3], v2, v[6:7]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v4
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v4
 ; SDAG-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[4:5]
 ; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v4
 ; SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, v1, s[6:7]
@@ -1172,7 +1172,7 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; SDAG-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
 ; SDAG-NEXT:    v_sub_u32_e32 v2, 0x96, v5
 ; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v2, v[6:7]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v2
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v2
 ; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, v0, s[4:5]
 ; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v2
 ; SDAG-NEXT:    v_cndmask_b32_e64 v3, v0, v6, s[4:5]
@@ -1227,8 +1227,8 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v9, s[6:7], 0, -1, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, -1, v[8:9]
-; GISEL-NEXT:    v_cmp_lt_i32_e64 s[4:5], -1, v4
+; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, -1, v[8:9]
+; GISEL-NEXT:    v_cmp_le_i32_e64 s[4:5], -1, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, -1, v[8:9]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
@@ -1305,7 +1305,7 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
 ; GISEL-NEXT:    v_add_u32_e32 v7, 0xffffff6a, v6
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v7, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v7
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v1, vcc
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v10, v8, 0
@@ -1340,7 +1340,7 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
 ; GISEL-NEXT:    v_sub_co_u32_e32 v2, vcc, 0x96, v6
 ; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v2, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
@@ -1482,7 +1482,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
 ; SDAG-NEXT:    v_mov_b32_e32 v6, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v3, 0
-; SDAG-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v5
+; SDAG-NEXT:    v_cmp_le_u32_e32 vcc, s4, v5
 ; SDAG-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; SDAG-NEXT:    s_cbranch_execz .LBB6_10
 ; SDAG-NEXT:  ; %bb.1: ; %fp-to-i-if-end
@@ -1493,7 +1493,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
 ; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v6, vcc
 ; SDAG-NEXT:    s_mov_b32 s7, -1
 ; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[6:7], s[6:7], v[0:1]
 ; SDAG-NEXT:    v_cmp_lt_i16_e32 vcc, -1, v4
 ; SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
@@ -1505,7 +1505,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
 ; SDAG-NEXT:    s_movk_i32 s4, 0x7f
 ; SDAG-NEXT:    v_and_b32_sdwa v0, v4, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; SDAG-NEXT:    s_mov_b64 s[4:5], 0x85
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[4:5], v[5:6]
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[4:5], s[4:5], v[5:6]
 ; SDAG-NEXT:    v_mov_b32_e32 v7, 0
 ; SDAG-NEXT:    v_cndmask_b32_e64 v9, -1, 0, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v8, -1, 1, vcc
@@ -1523,7 +1523,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
 ; SDAG-NEXT:    v_add_u32_e32 v4, 0xffffff7a, v5
 ; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v0, v[6:7]
 ; SDAG-NEXT:    v_lshlrev_b64 v[2:3], v2, v[6:7]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v4
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v4
 ; SDAG-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[4:5]
 ; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v4
 ; SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, v1, s[6:7]
@@ -1560,7 +1560,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
 ; SDAG-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
 ; SDAG-NEXT:    v_sub_u32_e32 v2, 0x86, v5
 ; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v2, v[6:7]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v2
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v2
 ; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, v0, s[4:5]
 ; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v2
 ; SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v6, s[4:5]
@@ -1613,7 +1613,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, -1, v[7:8]
+; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, -1, v[7:8]
 ; GISEL-NEXT:    v_cmp_lt_i16_e64 s[4:5], -1, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, -1, v[7:8]
@@ -1692,7 +1692,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
 ; GISEL-NEXT:    v_add_u32_e32 v10, 0xffffff7a, v5
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v10, v[6:7]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v10
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v10
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v12, 0, v1, vcc
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v11, v9, 0
@@ -1726,7 +1726,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
 ; GISEL-NEXT:    v_sub_co_u32_e32 v2, vcc, 0x86, v5
 ; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v2, v[6:7]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
@@ -1829,7 +1829,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
 ; SDAG-NEXT:    v_mov_b32_e32 v6, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v3, 0
-; SDAG-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v5
+; SDAG-NEXT:    v_cmp_le_u32_e32 vcc, s4, v5
 ; SDAG-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; SDAG-NEXT:    s_cbranch_execz .LBB7_10
 ; SDAG-NEXT:  ; %bb.1: ; %fp-to-i-if-end
@@ -1840,7 +1840,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
 ; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v6, vcc
 ; SDAG-NEXT:    s_mov_b32 s7, -1
 ; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[6:7], s[6:7], v[0:1]
 ; SDAG-NEXT:    v_cmp_lt_i16_e32 vcc, -1, v4
 ; SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
@@ -1852,7 +1852,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
 ; SDAG-NEXT:    s_movk_i32 s4, 0x7f
 ; SDAG-NEXT:    v_and_b32_sdwa v0, v4, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; SDAG-NEXT:    s_mov_b64 s[4:5], 0x85
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[4:5], v[5:6]
+; SDAG-NEXT:    v_cmp_le_u64_e64 s[4:5], s[4:5], v[5:6]
 ; SDAG-NEXT:    v_mov_b32_e32 v7, 0
 ; SDAG-NEXT:    v_cndmask_b32_e64 v9, -1, 0, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v8, -1, 1, vcc
@@ -1870,7 +1870,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
 ; SDAG-NEXT:    v_add_u32_e32 v4, 0xffffff7a, v5
 ; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v0, v[6:7]
 ; SDAG-NEXT:    v_lshlrev_b64 v[2:3], v2, v[6:7]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v4
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v4
 ; SDAG-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[4:5]
 ; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v4
 ; SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, v1, s[6:7]
@@ -1907,7 +1907,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
 ; SDAG-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
 ; SDAG-NEXT:    v_sub_u32_e32 v2, 0x86, v5
 ; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v2, v[6:7]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v2
+; SDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v2
 ; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, v0, s[4:5]
 ; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v2
 ; SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v6, s[4:5]
@@ -1960,7 +1960,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, -1, v[7:8]
+; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, -1, v[7:8]
 ; GISEL-NEXT:    v_cmp_lt_i16_e64 s[4:5], -1, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, -1, v[7:8]
@@ -2039,7 +2039,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
 ; GISEL-NEXT:    v_add_u32_e32 v10, 0xffffff7a, v5
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v10, v[6:7]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v10
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v10
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v12, 0, v1, vcc
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v11, v9, 0
@@ -2073,7 +2073,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
 ; GISEL-NEXT:    v_sub_co_u32_e32 v2, vcc, 0x86, v5
 ; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v2, v[6:7]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.math.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.math.ll
index d8f21d285ddff..7b4c11d363ff9 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.math.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.math.ll
@@ -302,12 +302,12 @@ define <2 x half> @v_test_cvt_v2f64_v2f16(<2 x double> %src) {
 ; GFX950-SDAG-NEXT:    v_lshl_or_b32 v6, v5, 12, v0
 ; GFX950-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; GFX950-SDAG-NEXT:    v_or_b32_e32 v4, v7, v4
-; GFX950-SDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v5
+; GFX950-SDAG-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v5
 ; GFX950-SDAG-NEXT:    s_movk_i32 s2, 0x40f
 ; GFX950-SDAG-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 ; GFX950-SDAG-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
 ; GFX950-SDAG-NEXT:    v_and_b32_e32 v6, 7, v4
-; GFX950-SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v6
+; GFX950-SDAG-NEXT:    v_cmp_le_i32_e32 vcc, 5, v6
 ; GFX950-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 2, v4
 ; GFX950-SDAG-NEXT:    s_mov_b32 s3, 0x8000
 ; GFX950-SDAG-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
@@ -317,7 +317,7 @@ define <2 x half> @v_test_cvt_v2f64_v2f16(<2 x double> %src) {
 ; GFX950-SDAG-NEXT:    v_or_b32_e32 v6, v6, v7
 ; GFX950-SDAG-NEXT:    v_add_u32_e32 v4, v4, v6
 ; GFX950-SDAG-NEXT:    v_mov_b32_e32 v6, 0x7c00
-; GFX950-SDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v5
+; GFX950-SDAG-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v5
 ; GFX950-SDAG-NEXT:    v_mov_b32_e32 v7, 0x7e00
 ; GFX950-SDAG-NEXT:    s_nop 0
 ; GFX950-SDAG-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
@@ -344,12 +344,12 @@ define <2 x half> @v_test_cvt_v2f64_v2f16(<2 x double> %src) {
 ; GFX950-SDAG-NEXT:    v_lshl_or_b32 v5, v4, 12, v1
 ; GFX950-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; GFX950-SDAG-NEXT:    v_or_b32_e32 v2, v8, v2
-; GFX950-SDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v4
+; GFX950-SDAG-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v4
 ; GFX950-SDAG-NEXT:    s_mov_b32 s0, 0x5040100
 ; GFX950-SDAG-NEXT:    s_nop 0
 ; GFX950-SDAG-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
 ; GFX950-SDAG-NEXT:    v_and_b32_e32 v5, 7, v2
-; GFX950-SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v5
+; GFX950-SDAG-NEXT:    v_cmp_le_i32_e32 vcc, 5, v5
 ; GFX950-SDAG-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
 ; GFX950-SDAG-NEXT:    s_nop 0
 ; GFX950-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
@@ -358,7 +358,7 @@ define <2 x half> @v_test_cvt_v2f64_v2f16(<2 x double> %src) {
 ; GFX950-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX950-SDAG-NEXT:    v_or_b32_e32 v5, v5, v8
 ; GFX950-SDAG-NEXT:    v_add_u32_e32 v2, v2, v5
-; GFX950-SDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v4
+; GFX950-SDAG-NEXT:    v_cmp_ge_i32_e32 vcc, 31, v4
 ; GFX950-SDAG-NEXT:    s_nop 1
 ; GFX950-SDAG-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
 ; GFX950-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
@@ -397,18 +397,18 @@ define <2 x half> @v_test_cvt_v2f64_v2f16(<2 x double> %src) {
 ; GFX950-GISEL-NEXT:    v_lshl_or_b32 v5, v5, 9, v8
 ; GFX950-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX950-GISEL-NEXT:    v_or_b32_e32 v0, v11, v0
-; GFX950-GISEL-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v4
+; GFX950-GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v4
 ; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 ; GFX950-GISEL-NEXT:    v_and_or_b32 v2, v3, v7, v2
 ; GFX950-GISEL-NEXT:    v_cndmask_b32_e32 v0, v9, v0, vcc
 ; GFX950-GISEL-NEXT:    v_and_b32_e32 v9, 7, v0
 ; GFX950-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v9
-; GFX950-GISEL-NEXT:    v_cmp_lt_i32_e64 s[0:1], 5, v9
+; GFX950-GISEL-NEXT:    v_cmp_le_i32_e64 s[0:1], 5, v9
 ; GFX950-GISEL-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
 ; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v0, 2, v0
 ; GFX950-GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[0:1]
 ; GFX950-GISEL-NEXT:    v_add_u32_e32 v0, v0, v9
-; GFX950-GISEL-NEXT:    v_cmp_lt_i32_e32 vcc, 30, v4
+; GFX950-GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 30, v4
 ; GFX950-GISEL-NEXT:    v_mov_b32_e32 v9, 0x40f
 ; GFX950-GISEL-NEXT:    s_nop 0
 ; GFX950-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
@@ -436,17 +436,17 @@ define <2 x half> @v_test_cvt_v2f64_v2f16(<2 x double> %src) {
 ; GFX950-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX950-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; GFX950-GISEL-NEXT:    v_or_b32_e32 v2, v10, v2
-; GFX950-GISEL-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v1
+; GFX950-GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v1
 ; GFX950-GISEL-NEXT:    s_nop 1
 ; GFX950-GISEL-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
 ; GFX950-GISEL-NEXT:    v_and_b32_e32 v6, 7, v2
 ; GFX950-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v6
-; GFX950-GISEL-NEXT:    v_cmp_lt_i32_e64 s[0:1], 5, v6
+; GFX950-GISEL-NEXT:    v_cmp_le_i32_e64 s[0:1], 5, v6
 ; GFX950-GISEL-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
 ; GFX950-GISEL-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
 ; GFX950-GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[0:1]
 ; GFX950-GISEL-NEXT:    v_add_u32_e32 v2, v2, v6
-; GFX950-GISEL-NEXT:    v_cmp_lt_i32_e32 vcc, 30, v1
+; GFX950-GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 30, v1
 ; GFX950-GISEL-NEXT:    s_nop 1
 ; GFX950-GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
 ; GFX950-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v9
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll
index 9845064604bc1..5edc8a8fe4d69 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll
@@ -4452,7 +4452,7 @@ define amdgpu_gfx void @global_atomic_max_i64_noret_scalar(ptr addrspace(1) inre
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_lt_i64_e32 vcc, s[34:35], v[8:9]
+; SI-NEXT:    v_cmp_le_i64_e32 vcc, s[34:35], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -4491,7 +4491,7 @@ define amdgpu_gfx void @global_atomic_max_i64_noret_scalar(ptr addrspace(1) inre
 ; VI-NEXT:  .LBB84_1: ; %atomicrmw.start
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
+; VI-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[2:3]
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -4518,7 +4518,7 @@ define amdgpu_gfx void @global_atomic_max_i64_noret_scalar(ptr addrspace(1) inre
 ; GFX9-NEXT:  .LBB84_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v2, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[4:5] glc
@@ -4560,7 +4560,7 @@ define amdgpu_gfx void @global_atomic_max_i64_noret_offset_scalar(ptr addrspace(
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_lt_i64_e32 vcc, s[34:35], v[8:9]
+; SI-NEXT:    v_cmp_le_i64_e32 vcc, s[34:35], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -4599,7 +4599,7 @@ define amdgpu_gfx void @global_atomic_max_i64_noret_offset_scalar(ptr addrspace(
 ; VI-NEXT:  .LBB85_1: ; %atomicrmw.start
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
+; VI-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[2:3]
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -4626,7 +4626,7 @@ define amdgpu_gfx void @global_atomic_max_i64_noret_offset_scalar(ptr addrspace(
 ; GFX9-NEXT:  .LBB85_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v2, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[4:5] offset:32 glc
@@ -4669,7 +4669,7 @@ define amdgpu_gfx i64 @global_atomic_max_i64_ret_scalar(ptr addrspace(1) inreg %
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_lt_i64_e32 vcc, s[34:35], v[8:9]
+; SI-NEXT:    v_cmp_le_i64_e32 vcc, s[34:35], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -4710,7 +4710,7 @@ define amdgpu_gfx i64 @global_atomic_max_i64_ret_scalar(ptr addrspace(1) inreg %
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v9, v1
 ; VI-NEXT:    v_mov_b32_e32 v8, v0
-; VI-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
+; VI-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[8:9]
 ; VI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -4737,7 +4737,7 @@ define amdgpu_gfx i64 @global_atomic_max_i64_ret_scalar(ptr addrspace(1) inreg %
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v7, v0
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[7:8]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[7:8]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v3, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v7, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v2, v[5:8], s[4:5] glc
@@ -4777,7 +4777,7 @@ define amdgpu_gfx i64 @global_atomic_max_i64_ret_offset_scalar(ptr addrspace(1)
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_lt_i64_e32 vcc, s[34:35], v[8:9]
+; SI-NEXT:    v_cmp_le_i64_e32 vcc, s[34:35], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -4818,7 +4818,7 @@ define amdgpu_gfx i64 @global_atomic_max_i64_ret_offset_scalar(ptr addrspace(1)
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v9, v1
 ; VI-NEXT:    v_mov_b32_e32 v8, v0
-; VI-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
+; VI-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[8:9]
 ; VI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -4845,7 +4845,7 @@ define amdgpu_gfx i64 @global_atomic_max_i64_ret_offset_scalar(ptr addrspace(1)
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v7, v0
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[7:8]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, s[6:7], v[7:8]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v3, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v7, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v2, v[5:8], s[4:5] offset:32 glc
@@ -4885,7 +4885,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr addrspace(1) %out, i
 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[8:9]
+; SI-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -4924,7 +4924,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr addrspace(1) %out, i
 ; VI-NEXT:    v_mov_b32_e32 v4, s0
 ; VI-NEXT:  .LBB88_1: ; %atomicrmw.start
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
-; VI-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
+; VI-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[2:3]
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -4957,7 +4957,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr addrspace(1) %out, i
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s7
 ; GFX9-NEXT:  .LBB88_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[0:1] offset:32 glc
@@ -4999,7 +4999,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr addrspace(1) %ou
 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[8:9]
+; SI-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -5045,7 +5045,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr addrspace(1) %ou
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; VI-NEXT:    v_mov_b32_e32 v9, v3
 ; VI-NEXT:    v_mov_b32_e32 v8, v2
-; VI-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[8:9]
+; VI-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[8:9]
 ; VI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -5081,7 +5081,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr addrspace(1) %ou
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v7, v0
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[7:8]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[7:8]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[5:8], s[0:1] offset:32 glc
@@ -5126,7 +5126,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr addrspace(1) %out, i64 %in,
 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[8:9]
+; SI-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -5163,7 +5163,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr addrspace(1) %out, i64 %in,
 ; VI-NEXT:    v_mov_b32_e32 v5, s5
 ; VI-NEXT:  .LBB90_1: ; %atomicrmw.start
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
-; VI-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
+; VI-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[2:3]
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -5196,7 +5196,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr addrspace(1) %out, i64 %in,
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s7
 ; GFX9-NEXT:  .LBB90_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[0:1] glc
@@ -5237,7 +5237,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr addrspace(1) %out, ptr
 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[8:9]
+; SI-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -5281,7 +5281,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr addrspace(1) %out, ptr
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; VI-NEXT:    v_mov_b32_e32 v9, v3
 ; VI-NEXT:    v_mov_b32_e32 v8, v2
-; VI-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[8:9]
+; VI-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[8:9]
 ; VI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -5317,7 +5317,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr addrspace(1) %out, ptr
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v7, v0
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[7:8]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, s[12:13], v[7:8]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[5:8], s[0:1] glc
@@ -5914,7 +5914,7 @@ define amdgpu_gfx void @global_atomic_umax_i64_noret_scalar(ptr addrspace(1) inr
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_lt_u64_e32 vcc, s[34:35], v[8:9]
+; SI-NEXT:    v_cmp_le_u64_e32 vcc, s[34:35], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -5953,7 +5953,7 @@ define amdgpu_gfx void @global_atomic_umax_i64_noret_scalar(ptr addrspace(1) inr
 ; VI-NEXT:  .LBB98_1: ; %atomicrmw.start
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
+; VI-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[2:3]
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -5980,7 +5980,7 @@ define amdgpu_gfx void @global_atomic_umax_i64_noret_scalar(ptr addrspace(1) inr
 ; GFX9-NEXT:  .LBB98_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v2, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[4:5] glc
@@ -6022,7 +6022,7 @@ define amdgpu_gfx void @global_atomic_umax_i64_noret_offset_scalar(ptr addrspace
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_lt_u64_e32 vcc, s[34:35], v[8:9]
+; SI-NEXT:    v_cmp_le_u64_e32 vcc, s[34:35], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -6061,7 +6061,7 @@ define amdgpu_gfx void @global_atomic_umax_i64_noret_offset_scalar(ptr addrspace
 ; VI-NEXT:  .LBB99_1: ; %atomicrmw.start
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
+; VI-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[2:3]
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -6088,7 +6088,7 @@ define amdgpu_gfx void @global_atomic_umax_i64_noret_offset_scalar(ptr addrspace
 ; GFX9-NEXT:  .LBB99_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v2, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[4:5] offset:32 glc
@@ -6131,7 +6131,7 @@ define amdgpu_gfx i64 @global_atomic_umax_i64_ret_scalar(ptr addrspace(1) inreg
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_lt_u64_e32 vcc, s[34:35], v[8:9]
+; SI-NEXT:    v_cmp_le_u64_e32 vcc, s[34:35], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -6172,7 +6172,7 @@ define amdgpu_gfx i64 @global_atomic_umax_i64_ret_scalar(ptr addrspace(1) inreg
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v9, v1
 ; VI-NEXT:    v_mov_b32_e32 v8, v0
-; VI-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
+; VI-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[8:9]
 ; VI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -6199,7 +6199,7 @@ define amdgpu_gfx i64 @global_atomic_umax_i64_ret_scalar(ptr addrspace(1) inreg
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v7, v0
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[7:8]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[7:8]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v3, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v7, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v2, v[5:8], s[4:5] glc
@@ -6239,7 +6239,7 @@ define amdgpu_gfx i64 @global_atomic_umax_i64_ret_offset_scalar(ptr addrspace(1)
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_lt_u64_e32 vcc, s[34:35], v[8:9]
+; SI-NEXT:    v_cmp_le_u64_e32 vcc, s[34:35], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -6280,7 +6280,7 @@ define amdgpu_gfx i64 @global_atomic_umax_i64_ret_offset_scalar(ptr addrspace(1)
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v9, v1
 ; VI-NEXT:    v_mov_b32_e32 v8, v0
-; VI-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
+; VI-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[8:9]
 ; VI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -6307,7 +6307,7 @@ define amdgpu_gfx i64 @global_atomic_umax_i64_ret_offset_scalar(ptr addrspace(1)
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v7, v0
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[7:8]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[7:8]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v3, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v7, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v2, v[5:8], s[4:5] offset:32 glc
@@ -6347,7 +6347,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr addrspace(1) %out,
 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[8:9]
+; SI-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -6386,7 +6386,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr addrspace(1) %out,
 ; VI-NEXT:    v_mov_b32_e32 v4, s0
 ; VI-NEXT:  .LBB102_1: ; %atomicrmw.start
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
-; VI-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[2:3]
+; VI-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[2:3]
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -6419,7 +6419,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr addrspace(1) %out,
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s7
 ; GFX9-NEXT:  .LBB102_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[2:3]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[0:1] offset:32 glc
@@ -6461,7 +6461,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr addrspace(1) %o
 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[8:9]
+; SI-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -6507,7 +6507,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr addrspace(1) %o
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; VI-NEXT:    v_mov_b32_e32 v9, v3
 ; VI-NEXT:    v_mov_b32_e32 v8, v2
-; VI-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[8:9]
+; VI-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[8:9]
 ; VI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -6543,7 +6543,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr addrspace(1) %o
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v7, v0
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[7:8]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[7:8]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[5:8], s[0:1] offset:32 glc
@@ -6587,7 +6587,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr addrspace(1) %out, ptr
 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[8:9]
+; SI-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -6631,7 +6631,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr addrspace(1) %out, ptr
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; VI-NEXT:    v_mov_b32_e32 v9, v3
 ; VI-NEXT:    v_mov_b32_e32 v8, v2
-; VI-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[8:9]
+; VI-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[8:9]
 ; VI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -6667,7 +6667,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr addrspace(1) %out, ptr
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v7, v0
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[7:8]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[12:13], v[7:8]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[5:8], s[0:1] glc
@@ -7264,7 +7264,7 @@ define amdgpu_gfx void @global_atomic_umin_i64_noret_scalar(ptr addrspace(1) inr
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_ge_u64_e32 vcc, s[34:35], v[8:9]
+; SI-NEXT:    v_cmp_gt_u64_e32 vcc, s[34:35], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -7303,7 +7303,7 @@ define amdgpu_gfx void @global_atomic_umin_i64_noret_scalar(ptr addrspace(1) inr
 ; VI-NEXT:  .LBB111_1: ; %atomicrmw.start
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
+; VI-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[2:3]
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -7330,7 +7330,7 @@ define amdgpu_gfx void @global_atomic_umin_i64_noret_scalar(ptr addrspace(1) inr
 ; GFX9-NEXT:  .LBB111_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
+; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v2, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[4:5] glc
@@ -7372,7 +7372,7 @@ define amdgpu_gfx void @global_atomic_umin_i64_noret_offset_scalar(ptr addrspace
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_ge_u64_e32 vcc, s[34:35], v[8:9]
+; SI-NEXT:    v_cmp_gt_u64_e32 vcc, s[34:35], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -7411,7 +7411,7 @@ define amdgpu_gfx void @global_atomic_umin_i64_noret_offset_scalar(ptr addrspace
 ; VI-NEXT:  .LBB112_1: ; %atomicrmw.start
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
+; VI-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[2:3]
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -7438,7 +7438,7 @@ define amdgpu_gfx void @global_atomic_umin_i64_noret_offset_scalar(ptr addrspace
 ; GFX9-NEXT:  .LBB112_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
+; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v2, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[4:5] offset:32 glc
@@ -7481,7 +7481,7 @@ define amdgpu_gfx i64 @global_atomic_umin_i64_ret_scalar(ptr addrspace(1) inreg
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_ge_u64_e32 vcc, s[34:35], v[8:9]
+; SI-NEXT:    v_cmp_gt_u64_e32 vcc, s[34:35], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -7522,7 +7522,7 @@ define amdgpu_gfx i64 @global_atomic_umin_i64_ret_scalar(ptr addrspace(1) inreg
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v9, v1
 ; VI-NEXT:    v_mov_b32_e32 v8, v0
-; VI-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
+; VI-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[8:9]
 ; VI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -7549,7 +7549,7 @@ define amdgpu_gfx i64 @global_atomic_umin_i64_ret_scalar(ptr addrspace(1) inreg
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v7, v0
-; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[7:8]
+; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[7:8]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v3, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v7, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v2, v[5:8], s[4:5] glc
@@ -7589,7 +7589,7 @@ define amdgpu_gfx i64 @global_atomic_umin_i64_ret_offset_scalar(ptr addrspace(1)
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_ge_u64_e32 vcc, s[34:35], v[8:9]
+; SI-NEXT:    v_cmp_gt_u64_e32 vcc, s[34:35], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -7630,7 +7630,7 @@ define amdgpu_gfx i64 @global_atomic_umin_i64_ret_offset_scalar(ptr addrspace(1)
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v9, v1
 ; VI-NEXT:    v_mov_b32_e32 v8, v0
-; VI-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
+; VI-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[8:9]
 ; VI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -7657,7 +7657,7 @@ define amdgpu_gfx i64 @global_atomic_umin_i64_ret_offset_scalar(ptr addrspace(1)
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v7, v0
-; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[7:8]
+; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, s[6:7], v[7:8]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v3, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v7, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v2, v[5:8], s[4:5] offset:32 glc
@@ -8250,7 +8250,7 @@ define amdgpu_gfx void @global_atomic_min_i64_noret_scalar(ptr addrspace(1) inre
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_ge_i64_e32 vcc, s[34:35], v[8:9]
+; SI-NEXT:    v_cmp_gt_i64_e32 vcc, s[34:35], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -8289,7 +8289,7 @@ define amdgpu_gfx void @global_atomic_min_i64_noret_scalar(ptr addrspace(1) inre
 ; VI-NEXT:  .LBB121_1: ; %atomicrmw.start
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
+; VI-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[2:3]
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -8316,7 +8316,7 @@ define amdgpu_gfx void @global_atomic_min_i64_noret_scalar(ptr addrspace(1) inre
 ; GFX9-NEXT:  .LBB121_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v2, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[4:5] glc
@@ -8358,7 +8358,7 @@ define amdgpu_gfx void @global_atomic_min_i64_noret_offset_scalar(ptr addrspace(
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_ge_i64_e32 vcc, s[34:35], v[8:9]
+; SI-NEXT:    v_cmp_gt_i64_e32 vcc, s[34:35], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -8397,7 +8397,7 @@ define amdgpu_gfx void @global_atomic_min_i64_noret_offset_scalar(ptr addrspace(
 ; VI-NEXT:  .LBB122_1: ; %atomicrmw.start
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
+; VI-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[2:3]
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -8424,7 +8424,7 @@ define amdgpu_gfx void @global_atomic_min_i64_noret_offset_scalar(ptr addrspace(
 ; GFX9-NEXT:  .LBB122_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v2, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[4:5] offset:32 glc
@@ -8467,7 +8467,7 @@ define amdgpu_gfx i64 @global_atomic_min_i64_ret_scalar(ptr addrspace(1) inreg %
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_ge_i64_e32 vcc, s[34:35], v[8:9]
+; SI-NEXT:    v_cmp_gt_i64_e32 vcc, s[34:35], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -8508,7 +8508,7 @@ define amdgpu_gfx i64 @global_atomic_min_i64_ret_scalar(ptr addrspace(1) inreg %
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v9, v1
 ; VI-NEXT:    v_mov_b32_e32 v8, v0
-; VI-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
+; VI-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[8:9]
 ; VI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -8535,7 +8535,7 @@ define amdgpu_gfx i64 @global_atomic_min_i64_ret_scalar(ptr addrspace(1) inreg %
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v7, v0
-; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[7:8]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[7:8]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v3, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v7, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v2, v[5:8], s[4:5] glc
@@ -8575,7 +8575,7 @@ define amdgpu_gfx i64 @global_atomic_min_i64_ret_offset_scalar(ptr addrspace(1)
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_ge_i64_e32 vcc, s[34:35], v[8:9]
+; SI-NEXT:    v_cmp_gt_i64_e32 vcc, s[34:35], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -8616,7 +8616,7 @@ define amdgpu_gfx i64 @global_atomic_min_i64_ret_offset_scalar(ptr addrspace(1)
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v9, v1
 ; VI-NEXT:    v_mov_b32_e32 v8, v0
-; VI-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
+; VI-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[8:9]
 ; VI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
@@ -8643,7 +8643,7 @@ define amdgpu_gfx i64 @global_atomic_min_i64_ret_offset_scalar(ptr addrspace(1)
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v7, v0
-; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[7:8]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[6:7], v[7:8]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v3, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v4, v7, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v2, v[5:8], s[4:5] offset:32 glc
@@ -8683,7 +8683,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr addrspace(1) %out, i
 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[8:9]
+; SI-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -8722,7 +8722,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr addrspace(1) %out, i
 ; VI-NEXT:    v_mov_b32_e32 v4, s0
 ; VI-NEXT:  .LBB125_1: ; %atomicrmw.start
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
-; VI-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
+; VI-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[2:3]
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -8755,7 +8755,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr addrspace(1) %out, i
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s7
 ; GFX9-NEXT:  .LBB125_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[0:1] offset:32 glc
@@ -8797,7 +8797,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr addrspace(1) %ou
 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[8:9]
+; SI-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -8843,7 +8843,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr addrspace(1) %ou
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; VI-NEXT:    v_mov_b32_e32 v9, v3
 ; VI-NEXT:    v_mov_b32_e32 v8, v2
-; VI-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[8:9]
+; VI-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[8:9]
 ; VI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -8879,7 +8879,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr addrspace(1) %ou
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v7, v0
-; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[7:8]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[7:8]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[5:8], s[0:1] offset:32 glc
@@ -8922,7 +8922,7 @@ define amdgpu_kernel void @atomic_min_i64(ptr addrspace(1) %out, i64 %in) {
 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[8:9]
+; SI-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -8955,7 +8955,7 @@ define amdgpu_kernel void @atomic_min_i64(ptr addrspace(1) %out, i64 %in) {
 ; VI-NEXT:    v_mov_b32_e32 v3, s7
 ; VI-NEXT:  .LBB127_1: ; %atomicrmw.start
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
-; VI-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
+; VI-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[2:3]
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
@@ -8984,7 +8984,7 @@ define amdgpu_kernel void @atomic_min_i64(ptr addrspace(1) %out, i64 %in) {
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s7
 ; GFX9-NEXT:  .LBB127_1: ; %atomicrmw.start
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v5, v2, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[0:1] glc
@@ -9024,7 +9024,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr addrspace(1) %out, ptr
 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; SI-NEXT:    v_mov_b32_e32 v9, v1
 ; SI-NEXT:    v_mov_b32_e32 v8, v0
-; SI-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[8:9]
+; SI-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[8:9]
 ; SI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; SI-NEXT:    s_waitcnt expcnt(0)
@@ -9068,7 +9068,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr addrspace(1) %out, ptr
 ; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; VI-NEXT:    v_mov_b32_e32 v9, v3
 ; VI-NEXT:    v_mov_b32_e32 v8, v2
-; VI-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[8:9]
+; VI-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[8:9]
 ; VI-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
 ; VI-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
@@ -9104,7 +9104,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr addrspace(1) %out, ptr
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    v_mov_b32_e32 v8, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v7, v0
-; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[7:8]
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[12:13], v[7:8]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
 ; GFX9-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[5:8], s[0:1] glc
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
index 3d9fff23107b0..e0e8a0dac55f0 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -9279,7 +9279,7 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) {
 ; SI-MOVREL-NEXT:    ; =>This Loop Header: Depth=1
 ; SI-MOVREL-NEXT:    ; Child Loop BB26_4 Depth 2
 ; SI-MOVREL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-MOVREL-NEXT:    v_cmp_le_i32_e32 vcc, s0, v0
+; SI-MOVREL-NEXT:    v_cmp_lt_i32_e32 vcc, s0, v0
 ; SI-MOVREL-NEXT:    s_cbranch_vccnz .LBB26_1
 ; SI-MOVREL-NEXT:  ; %bb.3: ; %bb4
 ; SI-MOVREL-NEXT:    ; in Loop: Header=BB26_2 Depth=1
@@ -9315,7 +9315,7 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) {
 ; VI-MOVREL-NEXT:    ; =>This Loop Header: Depth=1
 ; VI-MOVREL-NEXT:    ; Child Loop BB26_4 Depth 2
 ; VI-MOVREL-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-MOVREL-NEXT:    v_cmp_le_i32_e32 vcc, s0, v0
+; VI-MOVREL-NEXT:    v_cmp_lt_i32_e32 vcc, s0, v0
 ; VI-MOVREL-NEXT:    s_cbranch_vccnz .LBB26_1
 ; VI-MOVREL-NEXT:  ; %bb.3: ; %bb4
 ; VI-MOVREL-NEXT:    ; in Loop: Header=BB26_2 Depth=1
@@ -9351,7 +9351,7 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) {
 ; VI-IDXMODE-NEXT:    ; =>This Loop Header: Depth=1
 ; VI-IDXMODE-NEXT:    ; Child Loop BB26_4 Depth 2
 ; VI-IDXMODE-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-IDXMODE-NEXT:    v_cmp_le_i32_e32 vcc, s0, v0
+; VI-IDXMODE-NEXT:    v_cmp_lt_i32_e32 vcc, s0, v0
 ; VI-IDXMODE-NEXT:    s_cbranch_vccnz .LBB26_1
 ; VI-IDXMODE-NEXT:  ; %bb.3: ; %bb4
 ; VI-IDXMODE-NEXT:    ; in Loop: Header=BB26_2 Depth=1
@@ -9388,7 +9388,7 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) {
 ; GFX9-IDXMODE-NEXT:    ; =>This Loop Header: Depth=1
 ; GFX9-IDXMODE-NEXT:    ; Child Loop BB26_4 Depth 2
 ; GFX9-IDXMODE-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-IDXMODE-NEXT:    v_cmp_le_i32_e32 vcc, s0, v0
+; GFX9-IDXMODE-NEXT:    v_cmp_lt_i32_e32 vcc, s0, v0
 ; GFX9-IDXMODE-NEXT:    s_cbranch_vccnz .LBB26_1
 ; GFX9-IDXMODE-NEXT:  ; %bb.3: ; %bb4
 ; GFX9-IDXMODE-NEXT:    ; in Loop: Header=BB26_2 Depth=1
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll
index c001df48499c7..7df082148df11 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll
@@ -21,7 +21,7 @@ define bfloat @sitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    v_subb_co_u32_e32 v5, vcc, 0, v1, vcc
 ; GCN-NEXT:    v_subb_co_u32_e32 v6, vcc, 0, v2, vcc
 ; GCN-NEXT:    v_subb_co_u32_e32 v7, vcc, 0, v3, vcc
-; GCN-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
+; GCN-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[2:3]
 ; GCN-NEXT:    ; implicit-def: $vgpr8
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GCN-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
@@ -39,13 +39,13 @@ define bfloat @sitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    v_add_u32_e32 v6, 64, v6
 ; GCN-NEXT:    v_cndmask_b32_e32 v7, v6, v2, vcc
 ; GCN-NEXT:    v_sub_u32_e32 v2, 0x80, v7
-; GCN-NEXT:    v_cmp_gt_i32_e32 vcc, 25, v2
+; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, 25, v2
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GCN-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
 ; GCN-NEXT:  ; %bb.2: ; %itofp-if-else
 ; GCN-NEXT:    v_add_u32_e32 v2, 0xffffff98, v7
 ; GCN-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; GCN-NEXT:    v_cndmask_b32_e32 v8, 0, v0, vcc
 ; GCN-NEXT:    ; implicit-def: $vgpr2
 ; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
@@ -56,7 +56,7 @@ define bfloat @sitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
 ; GCN-NEXT:    s_cbranch_execz .LBB0_13
 ; GCN-NEXT:  ; %bb.4: ; %NodeBlock
-; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, 25, v2
+; GCN-NEXT:    v_cmp_le_i32_e32 vcc, 25, v2
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GCN-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
 ; GCN-NEXT:    s_cbranch_execz .LBB0_8
@@ -73,7 +73,7 @@ define bfloat @sitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GCN-NEXT:    v_or_b32_e32 v10, v8, v10
 ; GCN-NEXT:    v_lshrrev_b64 v[8:9], v13, v[4:5]
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v12
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v12
 ; GCN-NEXT:    v_add_u32_e32 v14, 26, v7
 ; GCN-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v12
@@ -86,7 +86,7 @@ define bfloat @sitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    v_cndmask_b32_e64 v9, v9, v1, s[4:5]
 ; GCN-NEXT:    v_or_b32_e32 v11, v13, v11
 ; GCN-NEXT:    v_or_b32_e32 v10, v12, v10
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v14
 ; GCN-NEXT:    v_lshlrev_b64 v[0:1], v14, v[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e32 v8, v8, v11, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
@@ -167,14 +167,14 @@ define bfloat @uitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    v_add_u32_e32 v5, 64, v5
 ; GCN-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
 ; GCN-NEXT:    v_sub_u32_e32 v4, 0x80, v6
-; GCN-NEXT:    v_cmp_gt_i32_e32 vcc, 25, v4
+; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, 25, v4
 ; GCN-NEXT:    ; implicit-def: $vgpr7
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GCN-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
 ; GCN-NEXT:  ; %bb.2: ; %itofp-if-else
 ; GCN-NEXT:    v_add_u32_e32 v2, 0xffffff98, v6
 ; GCN-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; GCN-NEXT:    v_cndmask_b32_e32 v7, 0, v0, vcc
 ; GCN-NEXT:    ; implicit-def: $vgpr4
 ; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
@@ -185,7 +185,7 @@ define bfloat @uitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
 ; GCN-NEXT:    s_cbranch_execz .LBB1_13
 ; GCN-NEXT:  ; %bb.4: ; %NodeBlock
-; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, 25, v4
+; GCN-NEXT:    v_cmp_le_i32_e32 vcc, 25, v4
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GCN-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
 ; GCN-NEXT:    s_cbranch_execz .LBB1_8
@@ -202,7 +202,7 @@ define bfloat @uitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    v_or_b32_e32 v10, v8, v10
 ; GCN-NEXT:    v_or_b32_e32 v9, v7, v9
 ; GCN-NEXT:    v_lshrrev_b64 v[7:8], v12, v[2:3]
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v11
 ; GCN-NEXT:    v_add_u32_e32 v13, 26, v6
 ; GCN-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
@@ -215,7 +215,7 @@ define bfloat @uitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    v_cndmask_b32_e64 v8, v8, v1, s[4:5]
 ; GCN-NEXT:    v_or_b32_e32 v10, v12, v10
 ; GCN-NEXT:    v_or_b32_e32 v9, v11, v9
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v13
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v13
 ; GCN-NEXT:    v_lshlrev_b64 v[0:1], v13, v[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e32 v7, v7, v10, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
index c316ec71863d0..fc911ff0eabcc 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
@@ -17,7 +17,7 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; SDAG-NEXT:    v_subb_co_u32_e32 v5, vcc, 0, v1, vcc
 ; SDAG-NEXT:    v_subb_co_u32_e32 v6, vcc, 0, v2, vcc
 ; SDAG-NEXT:    v_subb_co_u32_e32 v7, vcc, 0, v3, vcc
-; SDAG-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
+; SDAG-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[2:3]
 ; SDAG-NEXT:    ; implicit-def: $vgpr8
 ; SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
@@ -35,13 +35,13 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; SDAG-NEXT:    v_add_u32_e32 v6, 64, v6
 ; SDAG-NEXT:    v_cndmask_b32_e32 v7, v6, v2, vcc
 ; SDAG-NEXT:    v_sub_u32_e32 v2, 0x80, v7
-; SDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 25, v2
+; SDAG-NEXT:    v_cmp_ge_i32_e32 vcc, 25, v2
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
 ; SDAG-NEXT:  ; %bb.2: ; %itofp-if-else
 ; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff98, v7
 ; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; SDAG-NEXT:    v_cndmask_b32_e32 v8, 0, v0, vcc
 ; SDAG-NEXT:    ; implicit-def: $vgpr2
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
@@ -52,7 +52,7 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; SDAG-NEXT:    s_xor_b64 exec, exec, s[8:9]
 ; SDAG-NEXT:    s_cbranch_execz .LBB0_13
 ; SDAG-NEXT:  ; %bb.4: ; %NodeBlock
-; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 25, v2
+; SDAG-NEXT:    v_cmp_le_i32_e32 vcc, 25, v2
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
 ; SDAG-NEXT:    s_cbranch_execz .LBB0_8
@@ -69,7 +69,7 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; SDAG-NEXT:    v_or_b32_e32 v11, v9, v11
 ; SDAG-NEXT:    v_or_b32_e32 v10, v8, v10
 ; SDAG-NEXT:    v_lshrrev_b64 v[8:9], v13, v[4:5]
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v12
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v12
 ; SDAG-NEXT:    v_add_u32_e32 v14, 26, v7
 ; SDAG-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
 ; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v12
@@ -82,7 +82,7 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; SDAG-NEXT:    v_cndmask_b32_e64 v9, v9, v1, s[4:5]
 ; SDAG-NEXT:    v_or_b32_e32 v11, v13, v11
 ; SDAG-NEXT:    v_or_b32_e32 v10, v12, v10
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v14
 ; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v14, v[0:1]
 ; SDAG-NEXT:    v_cndmask_b32_e32 v8, v8, v11, vcc
 ; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
@@ -162,14 +162,14 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_min_u32_e32 v5, v5, v7
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc
 ; GISEL-NEXT:    v_sub_u32_e32 v7, 0x80, v5
-; GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 24, v7
+; GISEL-NEXT:    v_cmp_gt_i32_e32 vcc, 24, v7
 ; GISEL-NEXT:    ; implicit-def: $vgpr4
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
 ; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
 ; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
 ; GISEL-NEXT:    ; implicit-def: $vgpr7
 ; GISEL-NEXT:    ; implicit-def: $vgpr0
@@ -180,7 +180,7 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
 ; GISEL-NEXT:    s_cbranch_execz .LBB0_13
 ; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
-; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 26, v7
+; GISEL-NEXT:    v_cmp_lt_i32_e32 vcc, 26, v7
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB0_8
@@ -197,7 +197,7 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GISEL-NEXT:    v_or_b32_e32 v12, v10, v12
 ; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v13, v[2:3]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v4
 ; GISEL-NEXT:    v_add_u32_e32 v14, 26, v5
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v12, vcc
@@ -211,7 +211,7 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v15, v9, v11
 ; GISEL-NEXT:    v_or_b32_e32 v16, v10, v12
 ; GISEL-NEXT:    v_lshrrev_b64 v[11:12], v5, -1
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v14
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v11, v15, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, v12, v16, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
@@ -289,14 +289,14 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; SDAG-NEXT:    v_add_u32_e32 v5, 64, v5
 ; SDAG-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
 ; SDAG-NEXT:    v_sub_u32_e32 v4, 0x80, v6
-; SDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 25, v4
+; SDAG-NEXT:    v_cmp_ge_i32_e32 vcc, 25, v4
 ; SDAG-NEXT:    ; implicit-def: $vgpr7
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
 ; SDAG-NEXT:  ; %bb.2: ; %itofp-if-else
 ; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff98, v6
 ; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; SDAG-NEXT:    v_cndmask_b32_e32 v7, 0, v0, vcc
 ; SDAG-NEXT:    ; implicit-def: $vgpr4
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
@@ -307,7 +307,7 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; SDAG-NEXT:    s_xor_b64 exec, exec, s[8:9]
 ; SDAG-NEXT:    s_cbranch_execz .LBB1_13
 ; SDAG-NEXT:  ; %bb.4: ; %NodeBlock
-; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 25, v4
+; SDAG-NEXT:    v_cmp_le_i32_e32 vcc, 25, v4
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
 ; SDAG-NEXT:    s_cbranch_execz .LBB1_8
@@ -324,7 +324,7 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; SDAG-NEXT:    v_or_b32_e32 v10, v8, v10
 ; SDAG-NEXT:    v_or_b32_e32 v9, v7, v9
 ; SDAG-NEXT:    v_lshrrev_b64 v[7:8], v12, v[2:3]
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v11
 ; SDAG-NEXT:    v_add_u32_e32 v13, 26, v6
 ; SDAG-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
 ; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
@@ -337,7 +337,7 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; SDAG-NEXT:    v_cndmask_b32_e64 v8, v8, v1, s[4:5]
 ; SDAG-NEXT:    v_or_b32_e32 v10, v12, v10
 ; SDAG-NEXT:    v_or_b32_e32 v9, v11, v9
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v13
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v13
 ; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v13, v[0:1]
 ; SDAG-NEXT:    v_cndmask_b32_e32 v7, v7, v10, vcc
 ; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
@@ -407,14 +407,14 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_min_u32_e32 v5, v5, v6
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc
 ; GISEL-NEXT:    v_sub_u32_e32 v6, 0x80, v5
-; GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 24, v6
+; GISEL-NEXT:    v_cmp_gt_i32_e32 vcc, 24, v6
 ; GISEL-NEXT:    ; implicit-def: $vgpr4
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
 ; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
 ; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
 ; GISEL-NEXT:    ; implicit-def: $vgpr6
 ; GISEL-NEXT:    ; implicit-def: $vgpr0
@@ -425,7 +425,7 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
 ; GISEL-NEXT:    s_cbranch_execz .LBB1_13
 ; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
-; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 26, v6
+; GISEL-NEXT:    v_cmp_lt_i32_e32 vcc, 26, v6
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB1_8
@@ -442,7 +442,7 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v10, v8, v10
 ; GISEL-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GISEL-NEXT:    v_lshrrev_b64 v[8:9], v12, v[2:3]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v4
 ; GISEL-NEXT:    v_add_u32_e32 v13, 26, v5
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
@@ -456,7 +456,7 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v14, v8, v10
 ; GISEL-NEXT:    v_or_b32_e32 v15, v9, v11
 ; GISEL-NEXT:    v_lshrrev_b64 v[10:11], v5, -1
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v13
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v13
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v10, v14, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, v11, v15, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
@@ -528,7 +528,7 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; SDAG-NEXT:    v_subb_co_u32_e32 v1, vcc, 0, v5, vcc
 ; SDAG-NEXT:    v_subb_co_u32_e32 v6, vcc, 0, v2, vcc
 ; SDAG-NEXT:    v_subb_co_u32_e32 v7, vcc, 0, v3, vcc
-; SDAG-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
+; SDAG-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[2:3]
 ; SDAG-NEXT:    ; implicit-def: $vgpr10
 ; SDAG-NEXT:    v_cndmask_b32_e32 v6, v2, v6, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
@@ -546,14 +546,14 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; SDAG-NEXT:    v_add_u32_e32 v1, 64, v1
 ; SDAG-NEXT:    v_cndmask_b32_e32 v9, v1, v0, vcc
 ; SDAG-NEXT:    v_sub_u32_e32 v2, 0x80, v9
-; SDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 54, v2
+; SDAG-NEXT:    v_cmp_ge_i32_e32 vcc, 54, v2
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
 ; SDAG-NEXT:  ; %bb.2: ; %itofp-if-else
 ; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffffb5, v9
 ; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[4:5]
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v1, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; SDAG-NEXT:    ; implicit-def: $vgpr2
@@ -565,7 +565,7 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; SDAG-NEXT:    s_xor_b64 exec, exec, s[8:9]
 ; SDAG-NEXT:    s_cbranch_execz .LBB2_13
 ; SDAG-NEXT:  ; %bb.4: ; %NodeBlock
-; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 54, v2
+; SDAG-NEXT:    v_cmp_le_i32_e32 vcc, 54, v2
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
 ; SDAG-NEXT:    s_cbranch_execz .LBB2_8
@@ -582,7 +582,7 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; SDAG-NEXT:    v_or_b32_e32 v11, v1, v11
 ; SDAG-NEXT:    v_or_b32_e32 v10, v0, v10
 ; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v13, v[6:7]
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v12
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v12
 ; SDAG-NEXT:    v_add_u32_e32 v16, 55, v9
 ; SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
 ; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v12
@@ -596,7 +596,7 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; SDAG-NEXT:    v_lshlrev_b64 v[12:13], v9, v[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e32 v11, 0, v11, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v16
 ; SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e32 v9, v13, v15, vcc
@@ -691,7 +691,7 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_min_u32_e32 v1, v1, v7
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, v1, v0, vcc
 ; GISEL-NEXT:    v_sub_u32_e32 v7, 0x80, v9
-; GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 53, v7
+; GISEL-NEXT:    v_cmp_gt_i32_e32 vcc, 53, v7
 ; GISEL-NEXT:    ; implicit-def: $vgpr10
 ; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
@@ -699,7 +699,7 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
 ; GISEL-NEXT:    v_add_u32_e32 v4, 0xffffffb5, v9
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v4, v[2:3]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v4
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v1, vcc
 ; GISEL-NEXT:    ; implicit-def: $vgpr7
@@ -710,7 +710,7 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
 ; GISEL-NEXT:    s_cbranch_execz .LBB2_13
 ; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
-; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 55, v7
+; GISEL-NEXT:    v_cmp_lt_i32_e32 vcc, 55, v7
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB2_8
@@ -728,7 +728,7 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v10, v0, v10
 ; GISEL-NEXT:    v_or_b32_e32 v11, v1, v11
 ; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v15, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v14
 ; GISEL-NEXT:    v_add_u32_e32 v15, 55, v9
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
@@ -743,7 +743,7 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v16, v0, v12
 ; GISEL-NEXT:    v_or_b32_e32 v17, v1, v13
 ; GISEL-NEXT:    v_lshrrev_b64 v[12:13], v9, -1
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v15
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, v12, v16, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v12, v13, v17, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v15
@@ -834,7 +834,7 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; SDAG-NEXT:    v_add_u32_e32 v5, 64, v5
 ; SDAG-NEXT:    v_cndmask_b32_e32 v8, v5, v4, vcc
 ; SDAG-NEXT:    v_sub_u32_e32 v6, 0x80, v8
-; SDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 54, v6
+; SDAG-NEXT:    v_cmp_ge_i32_e32 vcc, 54, v6
 ; SDAG-NEXT:    ; implicit-def: $vgpr9
 ; SDAG-NEXT:    ; implicit-def: $vgpr4_vgpr5
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
@@ -842,7 +842,7 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; SDAG-NEXT:  ; %bb.2: ; %itofp-if-else
 ; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffffb5, v8
 ; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; SDAG-NEXT:    v_cndmask_b32_e32 v9, 0, v1, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
 ; SDAG-NEXT:    ; implicit-def: $vgpr6
@@ -854,7 +854,7 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; SDAG-NEXT:    s_xor_b64 exec, exec, s[8:9]
 ; SDAG-NEXT:    s_cbranch_execz .LBB3_13
 ; SDAG-NEXT:  ; %bb.4: ; %NodeBlock
-; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 54, v6
+; SDAG-NEXT:    v_cmp_le_i32_e32 vcc, 54, v6
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
 ; SDAG-NEXT:    s_cbranch_execz .LBB3_8
@@ -871,7 +871,7 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; SDAG-NEXT:    v_or_b32_e32 v10, v5, v10
 ; SDAG-NEXT:    v_or_b32_e32 v9, v4, v9
 ; SDAG-NEXT:    v_lshrrev_b64 v[4:5], v12, v[2:3]
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v11
 ; SDAG-NEXT:    v_add_u32_e32 v15, 55, v8
 ; SDAG-NEXT:    v_cndmask_b32_e32 v5, v5, v10, vcc
 ; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
@@ -885,7 +885,7 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; SDAG-NEXT:    v_lshlrev_b64 v[11:12], v8, v[0:1]
 ; SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v15
 ; SDAG-NEXT:    v_cndmask_b32_e64 v5, v5, v1, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e64 v4, v4, v0, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e32 v8, v12, v14, vcc
@@ -967,7 +967,7 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_min_u32_e32 v5, v5, v6
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, v5, v4, vcc
 ; GISEL-NEXT:    v_sub_u32_e32 v6, 0x80, v8
-; GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 53, v6
+; GISEL-NEXT:    v_cmp_gt_i32_e32 vcc, 53, v6
 ; GISEL-NEXT:    ; implicit-def: $vgpr9
 ; GISEL-NEXT:    ; implicit-def: $vgpr4_vgpr5
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
@@ -975,7 +975,7 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
 ; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffffb5, v8
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v1, vcc
 ; GISEL-NEXT:    ; implicit-def: $vgpr6
@@ -986,7 +986,7 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
 ; GISEL-NEXT:    s_cbranch_execz .LBB3_13
 ; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
-; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 55, v6
+; GISEL-NEXT:    v_cmp_lt_i32_e32 vcc, 55, v6
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB3_8
@@ -1004,7 +1004,7 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v9, v4, v9
 ; GISEL-NEXT:    v_or_b32_e32 v10, v5, v10
 ; GISEL-NEXT:    v_lshrrev_b64 v[4:5], v14, v[2:3]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v13
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v13
 ; GISEL-NEXT:    v_add_u32_e32 v15, 55, v8
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v10, vcc
@@ -1020,7 +1020,7 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v16, v4, v12
 ; GISEL-NEXT:    v_or_b32_e32 v17, v5, v13
 ; GISEL-NEXT:    v_lshrrev_b64 v[12:13], v8, -1
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v15
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, v12, v16, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v12, v13, v17, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v15
@@ -1107,7 +1107,7 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; SDAG-NEXT:    v_subb_co_u32_e32 v5, vcc, 0, v1, vcc
 ; SDAG-NEXT:    v_subb_co_u32_e32 v6, vcc, 0, v2, vcc
 ; SDAG-NEXT:    v_subb_co_u32_e32 v7, vcc, 0, v3, vcc
-; SDAG-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
+; SDAG-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[2:3]
 ; SDAG-NEXT:    ; implicit-def: $vgpr8
 ; SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
@@ -1125,13 +1125,13 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; SDAG-NEXT:    v_add_u32_e32 v6, 64, v6
 ; SDAG-NEXT:    v_cndmask_b32_e32 v7, v6, v2, vcc
 ; SDAG-NEXT:    v_sub_u32_e32 v2, 0x80, v7
-; SDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 25, v2
+; SDAG-NEXT:    v_cmp_ge_i32_e32 vcc, 25, v2
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
 ; SDAG-NEXT:  ; %bb.2: ; %itofp-if-else
 ; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff98, v7
 ; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; SDAG-NEXT:    v_cndmask_b32_e32 v8, 0, v0, vcc
 ; SDAG-NEXT:    ; implicit-def: $vgpr2
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
@@ -1142,7 +1142,7 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; SDAG-NEXT:    s_xor_b64 exec, exec, s[8:9]
 ; SDAG-NEXT:    s_cbranch_execz .LBB4_13
 ; SDAG-NEXT:  ; %bb.4: ; %NodeBlock
-; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 25, v2
+; SDAG-NEXT:    v_cmp_le_i32_e32 vcc, 25, v2
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
 ; SDAG-NEXT:    s_cbranch_execz .LBB4_8
@@ -1159,7 +1159,7 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; SDAG-NEXT:    v_or_b32_e32 v11, v9, v11
 ; SDAG-NEXT:    v_or_b32_e32 v10, v8, v10
 ; SDAG-NEXT:    v_lshrrev_b64 v[8:9], v13, v[4:5]
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v12
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v12
 ; SDAG-NEXT:    v_add_u32_e32 v14, 26, v7
 ; SDAG-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
 ; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v12
@@ -1172,7 +1172,7 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; SDAG-NEXT:    v_cndmask_b32_e64 v9, v9, v1, s[4:5]
 ; SDAG-NEXT:    v_or_b32_e32 v11, v13, v11
 ; SDAG-NEXT:    v_or_b32_e32 v10, v12, v10
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v14
 ; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v14, v[0:1]
 ; SDAG-NEXT:    v_cndmask_b32_e32 v8, v8, v11, vcc
 ; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
@@ -1253,14 +1253,14 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_min_u32_e32 v5, v5, v7
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc
 ; GISEL-NEXT:    v_sub_u32_e32 v7, 0x80, v5
-; GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 24, v7
+; GISEL-NEXT:    v_cmp_gt_i32_e32 vcc, 24, v7
 ; GISEL-NEXT:    ; implicit-def: $vgpr4
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
 ; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
 ; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
 ; GISEL-NEXT:    ; implicit-def: $vgpr7
 ; GISEL-NEXT:    ; implicit-def: $vgpr0
@@ -1271,7 +1271,7 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
 ; GISEL-NEXT:    s_cbranch_execz .LBB4_13
 ; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
-; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 26, v7
+; GISEL-NEXT:    v_cmp_lt_i32_e32 vcc, 26, v7
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB4_8
@@ -1288,7 +1288,7 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GISEL-NEXT:    v_or_b32_e32 v12, v10, v12
 ; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v13, v[2:3]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v4
 ; GISEL-NEXT:    v_add_u32_e32 v14, 26, v5
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v12, vcc
@@ -1302,7 +1302,7 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v15, v9, v11
 ; GISEL-NEXT:    v_or_b32_e32 v16, v10, v12
 ; GISEL-NEXT:    v_lshrrev_b64 v[11:12], v5, -1
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v14
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v11, v15, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, v12, v16, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
@@ -1381,14 +1381,14 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; SDAG-NEXT:    v_add_u32_e32 v5, 64, v5
 ; SDAG-NEXT:    v_cndmask_b32_e32 v6, v5, v4, vcc
 ; SDAG-NEXT:    v_sub_u32_e32 v4, 0x80, v6
-; SDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 25, v4
+; SDAG-NEXT:    v_cmp_ge_i32_e32 vcc, 25, v4
 ; SDAG-NEXT:    ; implicit-def: $vgpr7
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
 ; SDAG-NEXT:  ; %bb.2: ; %itofp-if-else
 ; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff98, v6
 ; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; SDAG-NEXT:    v_cndmask_b32_e32 v7, 0, v0, vcc
 ; SDAG-NEXT:    ; implicit-def: $vgpr4
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
@@ -1399,7 +1399,7 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; SDAG-NEXT:    s_xor_b64 exec, exec, s[8:9]
 ; SDAG-NEXT:    s_cbranch_execz .LBB5_13
 ; SDAG-NEXT:  ; %bb.4: ; %NodeBlock
-; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 25, v4
+; SDAG-NEXT:    v_cmp_le_i32_e32 vcc, 25, v4
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
 ; SDAG-NEXT:    s_cbranch_execz .LBB5_8
@@ -1416,7 +1416,7 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; SDAG-NEXT:    v_or_b32_e32 v10, v8, v10
 ; SDAG-NEXT:    v_or_b32_e32 v9, v7, v9
 ; SDAG-NEXT:    v_lshrrev_b64 v[7:8], v12, v[2:3]
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v11
 ; SDAG-NEXT:    v_add_u32_e32 v13, 26, v6
 ; SDAG-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
 ; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
@@ -1429,7 +1429,7 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; SDAG-NEXT:    v_cndmask_b32_e64 v8, v8, v1, s[4:5]
 ; SDAG-NEXT:    v_or_b32_e32 v10, v12, v10
 ; SDAG-NEXT:    v_or_b32_e32 v9, v11, v9
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v13
+; SDAG-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v13
 ; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v13, v[0:1]
 ; SDAG-NEXT:    v_cndmask_b32_e32 v7, v7, v10, vcc
 ; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
@@ -1500,14 +1500,14 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_min_u32_e32 v5, v5, v6
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v4, vcc
 ; GISEL-NEXT:    v_sub_u32_e32 v6, 0x80, v5
-; GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 24, v6
+; GISEL-NEXT:    v_cmp_gt_i32_e32 vcc, 24, v6
 ; GISEL-NEXT:    ; implicit-def: $vgpr4
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
 ; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
 ; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
 ; GISEL-NEXT:    ; implicit-def: $vgpr6
 ; GISEL-NEXT:    ; implicit-def: $vgpr0
@@ -1518,7 +1518,7 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
 ; GISEL-NEXT:    s_cbranch_execz .LBB5_13
 ; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
-; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 26, v6
+; GISEL-NEXT:    v_cmp_lt_i32_e32 vcc, 26, v6
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB5_8
@@ -1535,7 +1535,7 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v10, v8, v10
 ; GISEL-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GISEL-NEXT:    v_lshrrev_b64 v[8:9], v12, v[2:3]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v4
 ; GISEL-NEXT:    v_add_u32_e32 v13, 26, v5
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
@@ -1549,7 +1549,7 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v14, v8, v10
 ; GISEL-NEXT:    v_or_b32_e32 v15, v9, v11
 ; GISEL-NEXT:    v_lshrrev_b64 v[10:11], v5, -1
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v13
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v13
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v10, v14, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, v11, v15, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
index e00e1f13b2b77..748b46dab7bdb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
@@ -55,7 +55,7 @@ define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) {
 define amdgpu_cs i32 @compare_int_with_constant(i32 %x) {
 ; CHECK-LABEL: compare_int_with_constant:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
+; CHECK-NEXT:    v_cmp_le_i32_e64 s0, 0x62, v0
 ; CHECK-NEXT:    ; return to shader part epilog
   %cmp = icmp sge i32 %x, 99
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
@@ -179,7 +179,7 @@ false:
 define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) {
 ; CHECK-LABEL: branch_divergent_ballot_ne_zero_compare:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 12, v0
 ; CHECK-NEXT:    s_cbranch_vccz .LBB11_2
 ; CHECK-NEXT:  ; %bb.1: ; %true
 ; CHECK-NEXT:    s_mov_b32 s0, 42
@@ -223,7 +223,7 @@ false:
 define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) {
 ; CHECK-LABEL: branch_divergent_ballot_eq_zero_compare:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 12, v0
 ; CHECK-NEXT:    s_cbranch_vccz .LBB13_2
 ; CHECK-NEXT:  ; %bb.1: ; %false
 ; CHECK-NEXT:    s_mov_b32 s0, 33
@@ -267,8 +267,8 @@ false:
 define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) {
 ; CHECK-LABEL: branch_divergent_ballot_ne_zero_and:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT:    v_cmp_le_u32_e64 s0, 34, v1
 ; CHECK-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
 ; CHECK-NEXT:    s_cbranch_vccz .LBB15_2
 ; CHECK-NEXT:  ; %bb.1: ; %true
@@ -322,8 +322,8 @@ false:
 define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) {
 ; CHECK-LABEL: branch_divergent_ballot_eq_zero_and:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT:    v_cmp_le_u32_e64 s0, 34, v1
 ; CHECK-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
 ; CHECK-NEXT:    s_cbranch_vccz .LBB17_2
 ; CHECK-NEXT:  ; %bb.1: ; %false
@@ -402,8 +402,8 @@ declare i32 @llvm.amdgcn.icmp.i32(i1, i1, i32)
 define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_ne_zero_and(i32 %v1, i32 %v2) {
 ; CHECK-LABEL: branch_divergent_simulated_negated_ballot_ne_zero_and:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT:    v_cmp_le_u32_e64 s0, 34, v1
 ; CHECK-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
 ; CHECK-NEXT:    s_cbranch_vccnz .LBB20_2
 ; CHECK-NEXT:  ; %bb.1: ; %true
@@ -471,8 +471,8 @@ false:
 define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_eq_zero_and(i32 %v1, i32 %v2) {
 ; CHECK-LABEL: branch_divergent_simulated_negated_ballot_eq_zero_and:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; CHECK-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT:    v_cmp_le_u32_e64 s0, 34, v1
 ; CHECK-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
 ; CHECK-NEXT:    s_cbranch_vccnz .LBB22_2
 ; CHECK-NEXT:  ; %bb.1: ; %false
@@ -533,7 +533,7 @@ define amdgpu_ps void @non_cst_non_compare_input(ptr addrspace(1) %out, i32 %tid
 ; GFX10-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX10-NEXT:    s_xor_b32 s1, exec_lo, s1
 ; GFX10-NEXT:  ; %bb.1: ; %B
-; GFX10-NEXT:    v_cmp_gt_u32_e64 s0, 2, v2
+; GFX10-NEXT:    v_cmp_ge_u32_e64 s0, 2, v2
 ; GFX10-NEXT:    ; implicit-def: $vgpr2
 ; GFX10-NEXT:  ; %bb.2: ; %Flow
 ; GFX10-NEXT:    s_andn2_saveexec_b32 s1, s1
@@ -557,7 +557,7 @@ define amdgpu_ps void @non_cst_non_compare_input(ptr addrspace(1) %out, i32 %tid
 ; GFX11-NEXT:    v_cmpx_ne_u32_e32 0, v3
 ; GFX11-NEXT:    s_xor_b32 s1, exec_lo, s1
 ; GFX11-NEXT:  ; %bb.1: ; %B
-; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 2, v2
+; GFX11-NEXT:    v_cmp_ge_u32_e64 s0, 2, v2
 ; GFX11-NEXT:    ; implicit-def: $vgpr2
 ; GFX11-NEXT:  ; %bb.2: ; %Flow
 ; GFX11-NEXT:    s_and_not1_saveexec_b32 s1, s1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
index b4adf7f641550..f52a4658d6f01 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
@@ -57,7 +57,7 @@ define amdgpu_cs i64 @compare_int_with_constant(i32 %x) {
 ; CHECK-LABEL: compare_int_with_constant:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_movk_i32 s0, 0x62
-; CHECK-NEXT:    v_cmp_lt_i32_e64 s[0:1], s0, v0
+; CHECK-NEXT:    v_cmp_le_i32_e64 s[0:1], s0, v0
 ; CHECK-NEXT:    ; return to shader part epilog
   %cmp = icmp sge i32 %x, 99
   %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
@@ -182,7 +182,7 @@ false:
 define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) {
 ; CHECK-LABEL: branch_divergent_ballot_ne_zero_compare:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 12, v0
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, 12, v0
 ; CHECK-NEXT:    s_cbranch_vccz .LBB11_2
 ; CHECK-NEXT:  ; %bb.1: ; %true
 ; CHECK-NEXT:    s_mov_b32 s0, 42
@@ -226,7 +226,7 @@ false:
 define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) {
 ; CHECK-LABEL: branch_divergent_ballot_eq_zero_compare:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 12, v0
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, 12, v0
 ; CHECK-NEXT:    s_cbranch_vccz .LBB13_2
 ; CHECK-NEXT:  ; %bb.1: ; %false
 ; CHECK-NEXT:    s_mov_b32 s0, 33
@@ -270,8 +270,8 @@ false:
 define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) {
 ; CHECK-LABEL: branch_divergent_ballot_ne_zero_and:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 12, v0
-; CHECK-NEXT:    v_cmp_lt_u32_e64 s[0:1], 34, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, 12, v0
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], 34, v1
 ; CHECK-NEXT:    s_and_b64 vcc, vcc, s[0:1]
 ; CHECK-NEXT:    s_cbranch_vccz .LBB15_2
 ; CHECK-NEXT:  ; %bb.1: ; %true
@@ -325,8 +325,8 @@ false:
 define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) {
 ; CHECK-LABEL: branch_divergent_ballot_eq_zero_and:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 12, v0
-; CHECK-NEXT:    v_cmp_lt_u32_e64 s[0:1], 34, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, 12, v0
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], 34, v1
 ; CHECK-NEXT:    s_and_b64 vcc, vcc, s[0:1]
 ; CHECK-NEXT:    s_cbranch_vccz .LBB17_2
 ; CHECK-NEXT:  ; %bb.1: ; %false
@@ -405,8 +405,8 @@ declare i64 @llvm.amdgcn.icmp.i64(i1, i1, i32)
 define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_ne_zero_and(i32 %v1, i32 %v2) {
 ; CHECK-LABEL: branch_divergent_simulated_negated_ballot_ne_zero_and:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 12, v0
-; CHECK-NEXT:    v_cmp_lt_u32_e64 s[0:1], 34, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, 12, v0
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], 34, v1
 ; CHECK-NEXT:    s_and_b64 vcc, vcc, s[0:1]
 ; CHECK-NEXT:    s_cbranch_vccnz .LBB20_2
 ; CHECK-NEXT:  ; %bb.1: ; %true
@@ -460,8 +460,8 @@ false:
 define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_eq_zero_and(i32 %v1, i32 %v2) {
 ; CHECK-LABEL: branch_divergent_simulated_negated_ballot_eq_zero_and:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 12, v0
-; CHECK-NEXT:    v_cmp_lt_u32_e64 s[0:1], 34, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, 12, v0
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], 34, v1
 ; CHECK-NEXT:    s_and_b64 vcc, vcc, s[0:1]
 ; CHECK-NEXT:    s_cbranch_vccnz .LBB22_2
 ; CHECK-NEXT:  ; %bb.1: ; %false
@@ -522,7 +522,7 @@ define amdgpu_ps void @non_cst_non_compare_input(ptr addrspace(1) %out, i32 %tid
 ; CHECK-NEXT:    s_and_saveexec_b64 s[2:3], vcc
 ; CHECK-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
 ; CHECK-NEXT:  ; %bb.1: ; %B
-; CHECK-NEXT:    v_cmp_gt_u32_e64 s[0:1], 2, v2
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[0:1], 2, v2
 ; CHECK-NEXT:    ; implicit-def: $vgpr2
 ; CHECK-NEXT:  ; %bb.2: ; %Flow
 ; CHECK-NEXT:    s_andn2_saveexec_b64 s[2:3], s[2:3]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll
index 91aba09e942f0..90bd598c6a7f9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll
@@ -85,13 +85,13 @@ define amdgpu_cs i64 @compare_ints(i32 %x, i32 %y) {
 define amdgpu_cs i64 @compare_int_with_constant(i32 %x) {
 ; DAGISEL-LABEL: compare_int_with_constant:
 ; DAGISEL:       ; %bb.0:
-; DAGISEL-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
+; DAGISEL-NEXT:    v_cmp_le_i32_e64 s0, 0x62, v0
 ; DAGISEL-NEXT:    s_mov_b32 s1, 0
 ; DAGISEL-NEXT:    ; return to shader part epilog
 ;
 ; GISEL-LABEL: compare_int_with_constant:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    v_cmp_le_i32_e64 s0, 0x63, v0
+; GISEL-NEXT:    v_cmp_lt_i32_e64 s0, 0x63, v0
 ; GISEL-NEXT:    s_mov_b32 s1, 0
 ; GISEL-NEXT:    ; return to shader part epilog
   %cmp = icmp sge i32 %x, 99
@@ -126,7 +126,7 @@ define amdgpu_cs i64 @ctpop_of_ballot(float %x, float %y) {
 define amdgpu_cs i32 @branch_divergent_ballot64_ne_zero_compare(i32 %v) {
 ; CHECK-LABEL: branch_divergent_ballot64_ne_zero_compare:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_u32_e64 s0, 12, v0
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s0, 12, v0
 ; CHECK-NEXT:    s_mov_b32 s1, 0
 ; CHECK-NEXT:    s_cmp_eq_u64 s[0:1], 0
 ; CHECK-NEXT:    s_cbranch_scc1 .LBB7_2
@@ -150,8 +150,8 @@ false:
 define amdgpu_cs i32 @branch_divergent_ballot64_ne_zero_and(i32 %v1, i32 %v2) {
 ; DAGISEL-LABEL: branch_divergent_ballot64_ne_zero_and:
 ; DAGISEL:       ; %bb.0:
-; DAGISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; DAGISEL-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; DAGISEL-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 12, v0
+; DAGISEL-NEXT:    v_cmp_le_u32_e64 s0, 34, v1
 ; DAGISEL-NEXT:    s_mov_b32 s1, 0
 ; DAGISEL-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; DAGISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
@@ -168,8 +168,8 @@ define amdgpu_cs i32 @branch_divergent_ballot64_ne_zero_and(i32 %v1, i32 %v2) {
 ;
 ; GISEL-LABEL: branch_divergent_ballot64_ne_zero_and:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
-; GISEL-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 12, v0
+; GISEL-NEXT:    v_cmp_le_u32_e64 s0, 34, v1
 ; GISEL-NEXT:    s_mov_b32 s1, 0
 ; GISEL-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GISEL-NEXT:    s_cmp_eq_u64 s[0:1], 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
index 260b6fb39acb9..c1786ab0955b4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
@@ -162,7 +162,7 @@ define amdgpu_kernel void @v_icmp_i32_ugt(ptr addrspace(1) %out, i32 %src) {
 ; SDAG-GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
 ; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX11-NEXT:    v_cmp_lt_u32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    v_cmp_le_u32_e64 s2, 0x64, s2
 ; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
 ; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -175,7 +175,7 @@ define amdgpu_kernel void @v_icmp_i32_ugt(ptr addrspace(1) %out, i32 %src) {
 ; SDAG-GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX10-NEXT:    v_cmp_lt_u32_e64 s2, 0x64, s2
+; SDAG-GFX10-NEXT:    v_cmp_le_u32_e64 s2, 0x64, s2
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; SDAG-GFX10-NEXT:    s_endpgm
@@ -187,7 +187,7 @@ define amdgpu_kernel void @v_icmp_i32_ugt(ptr addrspace(1) %out, i32 %src) {
 ; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_cmp_lt_u32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    v_cmp_le_u32_e64 s2, 0x64, s2
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
@@ -200,7 +200,7 @@ define amdgpu_kernel void @v_icmp_i32_ugt(ptr addrspace(1) %out, i32 %src) {
 ; GISEL-GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX10-NEXT:    v_cmp_lt_u32_e64 s2, 0x64, s2
+; GISEL-GFX10-NEXT:    v_cmp_le_u32_e64 s2, 0x64, s2
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GISEL-GFX10-NEXT:    s_endpgm
@@ -216,7 +216,7 @@ define amdgpu_kernel void @v_icmp_i32_uge(ptr addrspace(1) %out, i32 %src) {
 ; SDAG-GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
 ; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX11-NEXT:    v_cmp_le_u32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    v_cmp_lt_u32_e64 s2, 0x64, s2
 ; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
 ; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -229,7 +229,7 @@ define amdgpu_kernel void @v_icmp_i32_uge(ptr addrspace(1) %out, i32 %src) {
 ; SDAG-GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX10-NEXT:    v_cmp_le_u32_e64 s2, 0x64, s2
+; SDAG-GFX10-NEXT:    v_cmp_lt_u32_e64 s2, 0x64, s2
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; SDAG-GFX10-NEXT:    s_endpgm
@@ -241,7 +241,7 @@ define amdgpu_kernel void @v_icmp_i32_uge(ptr addrspace(1) %out, i32 %src) {
 ; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_cmp_le_u32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    v_cmp_lt_u32_e64 s2, 0x64, s2
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
@@ -254,7 +254,7 @@ define amdgpu_kernel void @v_icmp_i32_uge(ptr addrspace(1) %out, i32 %src) {
 ; GISEL-GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX10-NEXT:    v_cmp_le_u32_e64 s2, 0x64, s2
+; GISEL-GFX10-NEXT:    v_cmp_lt_u32_e64 s2, 0x64, s2
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GISEL-GFX10-NEXT:    s_endpgm
@@ -270,7 +270,7 @@ define amdgpu_kernel void @v_icmp_i32_ult(ptr addrspace(1) %out, i32 %src) {
 ; SDAG-GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
 ; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX11-NEXT:    v_cmp_gt_u32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    v_cmp_ge_u32_e64 s2, 0x64, s2
 ; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
 ; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -283,7 +283,7 @@ define amdgpu_kernel void @v_icmp_i32_ult(ptr addrspace(1) %out, i32 %src) {
 ; SDAG-GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX10-NEXT:    v_cmp_gt_u32_e64 s2, 0x64, s2
+; SDAG-GFX10-NEXT:    v_cmp_ge_u32_e64 s2, 0x64, s2
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; SDAG-GFX10-NEXT:    s_endpgm
@@ -295,7 +295,7 @@ define amdgpu_kernel void @v_icmp_i32_ult(ptr addrspace(1) %out, i32 %src) {
 ; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_cmp_gt_u32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    v_cmp_ge_u32_e64 s2, 0x64, s2
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
@@ -308,7 +308,7 @@ define amdgpu_kernel void @v_icmp_i32_ult(ptr addrspace(1) %out, i32 %src) {
 ; GISEL-GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX10-NEXT:    v_cmp_gt_u32_e64 s2, 0x64, s2
+; GISEL-GFX10-NEXT:    v_cmp_ge_u32_e64 s2, 0x64, s2
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GISEL-GFX10-NEXT:    s_endpgm
@@ -324,7 +324,7 @@ define amdgpu_kernel void @v_icmp_i32_ule(ptr addrspace(1) %out, i32 %src) {
 ; SDAG-GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
 ; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX11-NEXT:    v_cmp_ge_u32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    v_cmp_gt_u32_e64 s2, 0x64, s2
 ; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
 ; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -337,7 +337,7 @@ define amdgpu_kernel void @v_icmp_i32_ule(ptr addrspace(1) %out, i32 %src) {
 ; SDAG-GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX10-NEXT:    v_cmp_ge_u32_e64 s2, 0x64, s2
+; SDAG-GFX10-NEXT:    v_cmp_gt_u32_e64 s2, 0x64, s2
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; SDAG-GFX10-NEXT:    s_endpgm
@@ -349,7 +349,7 @@ define amdgpu_kernel void @v_icmp_i32_ule(ptr addrspace(1) %out, i32 %src) {
 ; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_cmp_ge_u32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    v_cmp_gt_u32_e64 s2, 0x64, s2
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
@@ -362,7 +362,7 @@ define amdgpu_kernel void @v_icmp_i32_ule(ptr addrspace(1) %out, i32 %src) {
 ; GISEL-GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX10-NEXT:    v_cmp_ge_u32_e64 s2, 0x64, s2
+; GISEL-GFX10-NEXT:    v_cmp_gt_u32_e64 s2, 0x64, s2
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GISEL-GFX10-NEXT:    s_endpgm
@@ -378,7 +378,7 @@ define amdgpu_kernel void @v_icmp_i32_sgt(ptr addrspace(1) %out, i32 %src) #1 {
 ; SDAG-GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
 ; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX11-NEXT:    v_cmp_lt_i32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    v_cmp_le_i32_e64 s2, 0x64, s2
 ; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
 ; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -391,7 +391,7 @@ define amdgpu_kernel void @v_icmp_i32_sgt(ptr addrspace(1) %out, i32 %src) #1 {
 ; SDAG-GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX10-NEXT:    v_cmp_lt_i32_e64 s2, 0x64, s2
+; SDAG-GFX10-NEXT:    v_cmp_le_i32_e64 s2, 0x64, s2
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; SDAG-GFX10-NEXT:    s_endpgm
@@ -403,7 +403,7 @@ define amdgpu_kernel void @v_icmp_i32_sgt(ptr addrspace(1) %out, i32 %src) #1 {
 ; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_cmp_lt_i32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    v_cmp_le_i32_e64 s2, 0x64, s2
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
@@ -416,7 +416,7 @@ define amdgpu_kernel void @v_icmp_i32_sgt(ptr addrspace(1) %out, i32 %src) #1 {
 ; GISEL-GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX10-NEXT:    v_cmp_lt_i32_e64 s2, 0x64, s2
+; GISEL-GFX10-NEXT:    v_cmp_le_i32_e64 s2, 0x64, s2
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GISEL-GFX10-NEXT:    s_endpgm
@@ -432,7 +432,7 @@ define amdgpu_kernel void @v_icmp_i32_sge(ptr addrspace(1) %out, i32 %src) {
 ; SDAG-GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
 ; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX11-NEXT:    v_cmp_le_i32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    v_cmp_lt_i32_e64 s2, 0x64, s2
 ; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
 ; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -445,7 +445,7 @@ define amdgpu_kernel void @v_icmp_i32_sge(ptr addrspace(1) %out, i32 %src) {
 ; SDAG-GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX10-NEXT:    v_cmp_le_i32_e64 s2, 0x64, s2
+; SDAG-GFX10-NEXT:    v_cmp_lt_i32_e64 s2, 0x64, s2
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; SDAG-GFX10-NEXT:    s_endpgm
@@ -457,7 +457,7 @@ define amdgpu_kernel void @v_icmp_i32_sge(ptr addrspace(1) %out, i32 %src) {
 ; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_cmp_le_i32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    v_cmp_lt_i32_e64 s2, 0x64, s2
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
@@ -470,7 +470,7 @@ define amdgpu_kernel void @v_icmp_i32_sge(ptr addrspace(1) %out, i32 %src) {
 ; GISEL-GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX10-NEXT:    v_cmp_le_i32_e64 s2, 0x64, s2
+; GISEL-GFX10-NEXT:    v_cmp_lt_i32_e64 s2, 0x64, s2
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GISEL-GFX10-NEXT:    s_endpgm
@@ -486,7 +486,7 @@ define amdgpu_kernel void @v_icmp_i32_slt(ptr addrspace(1) %out, i32 %src) {
 ; SDAG-GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
 ; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX11-NEXT:    v_cmp_gt_i32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    v_cmp_ge_i32_e64 s2, 0x64, s2
 ; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
 ; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -499,7 +499,7 @@ define amdgpu_kernel void @v_icmp_i32_slt(ptr addrspace(1) %out, i32 %src) {
 ; SDAG-GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX10-NEXT:    v_cmp_gt_i32_e64 s2, 0x64, s2
+; SDAG-GFX10-NEXT:    v_cmp_ge_i32_e64 s2, 0x64, s2
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; SDAG-GFX10-NEXT:    s_endpgm
@@ -511,7 +511,7 @@ define amdgpu_kernel void @v_icmp_i32_slt(ptr addrspace(1) %out, i32 %src) {
 ; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_cmp_gt_i32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    v_cmp_ge_i32_e64 s2, 0x64, s2
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
@@ -524,7 +524,7 @@ define amdgpu_kernel void @v_icmp_i32_slt(ptr addrspace(1) %out, i32 %src) {
 ; GISEL-GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX10-NEXT:    v_cmp_gt_i32_e64 s2, 0x64, s2
+; GISEL-GFX10-NEXT:    v_cmp_ge_i32_e64 s2, 0x64, s2
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GISEL-GFX10-NEXT:    s_endpgm
@@ -540,7 +540,7 @@ define amdgpu_kernel void @v_icmp_i32_sle(ptr addrspace(1) %out, i32 %src) {
 ; SDAG-GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
 ; SDAG-GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX11-NEXT:    v_cmp_ge_i32_e64 s2, 0x64, s2
+; SDAG-GFX11-NEXT:    v_cmp_gt_i32_e64 s2, 0x64, s2
 ; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
 ; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -553,7 +553,7 @@ define amdgpu_kernel void @v_icmp_i32_sle(ptr addrspace(1) %out, i32 %src) {
 ; SDAG-GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX10-NEXT:    v_cmp_ge_i32_e64 s2, 0x64, s2
+; SDAG-GFX10-NEXT:    v_cmp_gt_i32_e64 s2, 0x64, s2
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; SDAG-GFX10-NEXT:    s_endpgm
@@ -565,7 +565,7 @@ define amdgpu_kernel void @v_icmp_i32_sle(ptr addrspace(1) %out, i32 %src) {
 ; GISEL-GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_cmp_ge_i32_e64 s2, 0x64, s2
+; GISEL-GFX11-NEXT:    v_cmp_gt_i32_e64 s2, 0x64, s2
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
@@ -578,7 +578,7 @@ define amdgpu_kernel void @v_icmp_i32_sle(ptr addrspace(1) %out, i32 %src) {
 ; GISEL-GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX10-NEXT:    v_cmp_ge_i32_e64 s2, 0x64, s2
+; GISEL-GFX10-NEXT:    v_cmp_gt_i32_e64 s2, 0x64, s2
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GISEL-GFX10-NEXT:    s_endpgm
@@ -684,7 +684,7 @@ define amdgpu_kernel void @v_icmp_u64_ugt(ptr addrspace(1) %out, i64 %src) {
 ; SDAG-GFX11:       ; %bb.0:
 ; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX11-NEXT:    v_cmp_lt_u64_e64 s2, 0x64, s[2:3]
+; SDAG-GFX11-NEXT:    v_cmp_le_u64_e64 s2, 0x64, s[2:3]
 ; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
 ; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -695,7 +695,7 @@ define amdgpu_kernel void @v_icmp_u64_ugt(ptr addrspace(1) %out, i64 %src) {
 ; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX10-NEXT:    v_cmp_lt_u64_e64 s2, 0x64, s[2:3]
+; SDAG-GFX10-NEXT:    v_cmp_le_u64_e64 s2, 0x64, s[2:3]
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; SDAG-GFX10-NEXT:    s_endpgm
@@ -705,7 +705,7 @@ define amdgpu_kernel void @v_icmp_u64_ugt(ptr addrspace(1) %out, i64 %src) {
 ; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_cmp_lt_u64_e64 s2, 0x64, s[2:3]
+; GISEL-GFX11-NEXT:    v_cmp_le_u64_e64 s2, 0x64, s[2:3]
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
@@ -716,7 +716,7 @@ define amdgpu_kernel void @v_icmp_u64_ugt(ptr addrspace(1) %out, i64 %src) {
 ; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX10-NEXT:    v_cmp_lt_u64_e64 s2, 0x64, s[2:3]
+; GISEL-GFX10-NEXT:    v_cmp_le_u64_e64 s2, 0x64, s[2:3]
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GISEL-GFX10-NEXT:    s_endpgm
@@ -730,7 +730,7 @@ define amdgpu_kernel void @v_icmp_u64_uge(ptr addrspace(1) %out, i64 %src) {
 ; SDAG-GFX11:       ; %bb.0:
 ; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX11-NEXT:    v_cmp_le_u64_e64 s2, 0x64, s[2:3]
+; SDAG-GFX11-NEXT:    v_cmp_lt_u64_e64 s2, 0x64, s[2:3]
 ; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
 ; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -741,7 +741,7 @@ define amdgpu_kernel void @v_icmp_u64_uge(ptr addrspace(1) %out, i64 %src) {
 ; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX10-NEXT:    v_cmp_le_u64_e64 s2, 0x64, s[2:3]
+; SDAG-GFX10-NEXT:    v_cmp_lt_u64_e64 s2, 0x64, s[2:3]
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; SDAG-GFX10-NEXT:    s_endpgm
@@ -751,7 +751,7 @@ define amdgpu_kernel void @v_icmp_u64_uge(ptr addrspace(1) %out, i64 %src) {
 ; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_cmp_le_u64_e64 s2, 0x64, s[2:3]
+; GISEL-GFX11-NEXT:    v_cmp_lt_u64_e64 s2, 0x64, s[2:3]
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
@@ -762,7 +762,7 @@ define amdgpu_kernel void @v_icmp_u64_uge(ptr addrspace(1) %out, i64 %src) {
 ; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX10-NEXT:    v_cmp_le_u64_e64 s2, 0x64, s[2:3]
+; GISEL-GFX10-NEXT:    v_cmp_lt_u64_e64 s2, 0x64, s[2:3]
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GISEL-GFX10-NEXT:    s_endpgm
@@ -776,7 +776,7 @@ define amdgpu_kernel void @v_icmp_u64_ult(ptr addrspace(1) %out, i64 %src) {
 ; SDAG-GFX11:       ; %bb.0:
 ; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX11-NEXT:    v_cmp_gt_u64_e64 s2, 0x64, s[2:3]
+; SDAG-GFX11-NEXT:    v_cmp_ge_u64_e64 s2, 0x64, s[2:3]
 ; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
 ; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -787,7 +787,7 @@ define amdgpu_kernel void @v_icmp_u64_ult(ptr addrspace(1) %out, i64 %src) {
 ; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX10-NEXT:    v_cmp_gt_u64_e64 s2, 0x64, s[2:3]
+; SDAG-GFX10-NEXT:    v_cmp_ge_u64_e64 s2, 0x64, s[2:3]
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; SDAG-GFX10-NEXT:    s_endpgm
@@ -797,7 +797,7 @@ define amdgpu_kernel void @v_icmp_u64_ult(ptr addrspace(1) %out, i64 %src) {
 ; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_cmp_gt_u64_e64 s2, 0x64, s[2:3]
+; GISEL-GFX11-NEXT:    v_cmp_ge_u64_e64 s2, 0x64, s[2:3]
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
@@ -808,7 +808,7 @@ define amdgpu_kernel void @v_icmp_u64_ult(ptr addrspace(1) %out, i64 %src) {
 ; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX10-NEXT:    v_cmp_gt_u64_e64 s2, 0x64, s[2:3]
+; GISEL-GFX10-NEXT:    v_cmp_ge_u64_e64 s2, 0x64, s[2:3]
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GISEL-GFX10-NEXT:    s_endpgm
@@ -822,7 +822,7 @@ define amdgpu_kernel void @v_icmp_u64_ule(ptr addrspace(1) %out, i64 %src) {
 ; SDAG-GFX11:       ; %bb.0:
 ; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX11-NEXT:    v_cmp_ge_u64_e64 s2, 0x64, s[2:3]
+; SDAG-GFX11-NEXT:    v_cmp_gt_u64_e64 s2, 0x64, s[2:3]
 ; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
 ; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -833,7 +833,7 @@ define amdgpu_kernel void @v_icmp_u64_ule(ptr addrspace(1) %out, i64 %src) {
 ; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX10-NEXT:    v_cmp_ge_u64_e64 s2, 0x64, s[2:3]
+; SDAG-GFX10-NEXT:    v_cmp_gt_u64_e64 s2, 0x64, s[2:3]
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; SDAG-GFX10-NEXT:    s_endpgm
@@ -843,7 +843,7 @@ define amdgpu_kernel void @v_icmp_u64_ule(ptr addrspace(1) %out, i64 %src) {
 ; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_cmp_ge_u64_e64 s2, 0x64, s[2:3]
+; GISEL-GFX11-NEXT:    v_cmp_gt_u64_e64 s2, 0x64, s[2:3]
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
@@ -854,7 +854,7 @@ define amdgpu_kernel void @v_icmp_u64_ule(ptr addrspace(1) %out, i64 %src) {
 ; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX10-NEXT:    v_cmp_ge_u64_e64 s2, 0x64, s[2:3]
+; GISEL-GFX10-NEXT:    v_cmp_gt_u64_e64 s2, 0x64, s[2:3]
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GISEL-GFX10-NEXT:    s_endpgm
@@ -868,7 +868,7 @@ define amdgpu_kernel void @v_icmp_i64_sgt(ptr addrspace(1) %out, i64 %src) {
 ; SDAG-GFX11:       ; %bb.0:
 ; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX11-NEXT:    v_cmp_lt_i64_e64 s2, 0x64, s[2:3]
+; SDAG-GFX11-NEXT:    v_cmp_le_i64_e64 s2, 0x64, s[2:3]
 ; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
 ; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -879,7 +879,7 @@ define amdgpu_kernel void @v_icmp_i64_sgt(ptr addrspace(1) %out, i64 %src) {
 ; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX10-NEXT:    v_cmp_lt_i64_e64 s2, 0x64, s[2:3]
+; SDAG-GFX10-NEXT:    v_cmp_le_i64_e64 s2, 0x64, s[2:3]
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; SDAG-GFX10-NEXT:    s_endpgm
@@ -889,7 +889,7 @@ define amdgpu_kernel void @v_icmp_i64_sgt(ptr addrspace(1) %out, i64 %src) {
 ; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_cmp_lt_i64_e64 s2, 0x64, s[2:3]
+; GISEL-GFX11-NEXT:    v_cmp_le_i64_e64 s2, 0x64, s[2:3]
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
@@ -900,7 +900,7 @@ define amdgpu_kernel void @v_icmp_i64_sgt(ptr addrspace(1) %out, i64 %src) {
 ; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX10-NEXT:    v_cmp_lt_i64_e64 s2, 0x64, s[2:3]
+; GISEL-GFX10-NEXT:    v_cmp_le_i64_e64 s2, 0x64, s[2:3]
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GISEL-GFX10-NEXT:    s_endpgm
@@ -914,7 +914,7 @@ define amdgpu_kernel void @v_icmp_i64_sge(ptr addrspace(1) %out, i64 %src) {
 ; SDAG-GFX11:       ; %bb.0:
 ; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX11-NEXT:    v_cmp_le_i64_e64 s2, 0x64, s[2:3]
+; SDAG-GFX11-NEXT:    v_cmp_lt_i64_e64 s2, 0x64, s[2:3]
 ; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
 ; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -925,7 +925,7 @@ define amdgpu_kernel void @v_icmp_i64_sge(ptr addrspace(1) %out, i64 %src) {
 ; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX10-NEXT:    v_cmp_le_i64_e64 s2, 0x64, s[2:3]
+; SDAG-GFX10-NEXT:    v_cmp_lt_i64_e64 s2, 0x64, s[2:3]
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; SDAG-GFX10-NEXT:    s_endpgm
@@ -935,7 +935,7 @@ define amdgpu_kernel void @v_icmp_i64_sge(ptr addrspace(1) %out, i64 %src) {
 ; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_cmp_le_i64_e64 s2, 0x64, s[2:3]
+; GISEL-GFX11-NEXT:    v_cmp_lt_i64_e64 s2, 0x64, s[2:3]
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
@@ -946,7 +946,7 @@ define amdgpu_kernel void @v_icmp_i64_sge(ptr addrspace(1) %out, i64 %src) {
 ; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX10-NEXT:    v_cmp_le_i64_e64 s2, 0x64, s[2:3]
+; GISEL-GFX10-NEXT:    v_cmp_lt_i64_e64 s2, 0x64, s[2:3]
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GISEL-GFX10-NEXT:    s_endpgm
@@ -960,7 +960,7 @@ define amdgpu_kernel void @v_icmp_i64_slt(ptr addrspace(1) %out, i64 %src) {
 ; SDAG-GFX11:       ; %bb.0:
 ; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX11-NEXT:    v_cmp_gt_i64_e64 s2, 0x64, s[2:3]
+; SDAG-GFX11-NEXT:    v_cmp_ge_i64_e64 s2, 0x64, s[2:3]
 ; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
 ; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -971,7 +971,7 @@ define amdgpu_kernel void @v_icmp_i64_slt(ptr addrspace(1) %out, i64 %src) {
 ; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX10-NEXT:    v_cmp_gt_i64_e64 s2, 0x64, s[2:3]
+; SDAG-GFX10-NEXT:    v_cmp_ge_i64_e64 s2, 0x64, s[2:3]
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; SDAG-GFX10-NEXT:    s_endpgm
@@ -981,7 +981,7 @@ define amdgpu_kernel void @v_icmp_i64_slt(ptr addrspace(1) %out, i64 %src) {
 ; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_cmp_gt_i64_e64 s2, 0x64, s[2:3]
+; GISEL-GFX11-NEXT:    v_cmp_ge_i64_e64 s2, 0x64, s[2:3]
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
@@ -992,7 +992,7 @@ define amdgpu_kernel void @v_icmp_i64_slt(ptr addrspace(1) %out, i64 %src) {
 ; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX10-NEXT:    v_cmp_gt_i64_e64 s2, 0x64, s[2:3]
+; GISEL-GFX10-NEXT:    v_cmp_ge_i64_e64 s2, 0x64, s[2:3]
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GISEL-GFX10-NEXT:    s_endpgm
@@ -1006,7 +1006,7 @@ define amdgpu_kernel void @v_icmp_i64_sle(ptr addrspace(1) %out, i64 %src) {
 ; SDAG-GFX11:       ; %bb.0:
 ; SDAG-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; SDAG-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX11-NEXT:    v_cmp_ge_i64_e64 s2, 0x64, s[2:3]
+; SDAG-GFX11-NEXT:    v_cmp_gt_i64_e64 s2, 0x64, s[2:3]
 ; SDAG-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; SDAG-GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
 ; SDAG-GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -1017,7 +1017,7 @@ define amdgpu_kernel void @v_icmp_i64_sle(ptr addrspace(1) %out, i64 %src) {
 ; SDAG-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-GFX10-NEXT:    v_cmp_ge_i64_e64 s2, 0x64, s[2:3]
+; SDAG-GFX10-NEXT:    v_cmp_gt_i64_e64 s2, 0x64, s[2:3]
 ; SDAG-GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; SDAG-GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; SDAG-GFX10-NEXT:    s_endpgm
@@ -1027,7 +1027,7 @@ define amdgpu_kernel void @v_icmp_i64_sle(ptr addrspace(1) %out, i64 %src) {
 ; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_cmp_ge_i64_e64 s2, 0x64, s[2:3]
+; GISEL-GFX11-NEXT:    v_cmp_gt_i64_e64 s2, 0x64, s[2:3]
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
@@ -1038,7 +1038,7 @@ define amdgpu_kernel void @v_icmp_i64_sle(ptr addrspace(1) %out, i64 %src) {
 ; GISEL-GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX10-NEXT:    v_cmp_ge_i64_e64 s2, 0x64, s[2:3]
+; GISEL-GFX10-NEXT:    v_cmp_gt_i64_e64 s2, 0x64, s[2:3]
 ; GISEL-GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GISEL-GFX10-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
index 13a53f0b96de2..50c97cb335599 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
@@ -191,7 +191,7 @@ define amdgpu_kernel void @v_icmp_i32_ugt(ptr addrspace(1) %out, i32 %src) {
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_lt_u32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    v_cmp_le_u32_e64 s[2:3], 0x64, s2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s3
@@ -251,7 +251,7 @@ define amdgpu_kernel void @v_icmp_i32_uge(ptr addrspace(1) %out, i32 %src) {
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_le_u32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    v_cmp_lt_u32_e64 s[2:3], 0x64, s2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s3
@@ -311,7 +311,7 @@ define amdgpu_kernel void @v_icmp_i32_ult(ptr addrspace(1) %out, i32 %src) {
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_gt_u32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    v_cmp_ge_u32_e64 s[2:3], 0x64, s2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s3
@@ -371,7 +371,7 @@ define amdgpu_kernel void @v_icmp_i32_ule(ptr addrspace(1) %out, i32 %src) {
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_ge_u32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    v_cmp_gt_u32_e64 s[2:3], 0x64, s2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s3
@@ -431,7 +431,7 @@ define amdgpu_kernel void @v_icmp_i32_sgt(ptr addrspace(1) %out, i32 %src) #1 {
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_lt_i32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    v_cmp_le_i32_e64 s[2:3], 0x64, s2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s3
@@ -491,7 +491,7 @@ define amdgpu_kernel void @v_icmp_i32_sge(ptr addrspace(1) %out, i32 %src) {
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_le_i32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    v_cmp_lt_i32_e64 s[2:3], 0x64, s2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s3
@@ -551,7 +551,7 @@ define amdgpu_kernel void @v_icmp_i32_slt(ptr addrspace(1) %out, i32 %src) {
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_gt_i32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    v_cmp_ge_i32_e64 s[2:3], 0x64, s2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s3
@@ -611,7 +611,7 @@ define amdgpu_kernel void @v_icmp_i32_sle(ptr addrspace(1) %out, i32 %src) {
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_ge_i32_e64 s[2:3], 0x64, s2
+; GFX11-NEXT:    v_cmp_gt_i32_e64 s[2:3], 0x64, s2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s3
@@ -785,7 +785,7 @@ define amdgpu_kernel void @v_icmp_u64_ugt(ptr addrspace(1) %out, i64 %src) {
 ; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_lt_u64_e64 s[2:3], 0x64, s[2:3]
+; GFX11-NEXT:    v_cmp_le_u64_e64 s[2:3], 0x64, s[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s3
@@ -843,7 +843,7 @@ define amdgpu_kernel void @v_icmp_u64_uge(ptr addrspace(1) %out, i64 %src) {
 ; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_le_u64_e64 s[2:3], 0x64, s[2:3]
+; GFX11-NEXT:    v_cmp_lt_u64_e64 s[2:3], 0x64, s[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s3
@@ -901,7 +901,7 @@ define amdgpu_kernel void @v_icmp_u64_ult(ptr addrspace(1) %out, i64 %src) {
 ; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_gt_u64_e64 s[2:3], 0x64, s[2:3]
+; GFX11-NEXT:    v_cmp_ge_u64_e64 s[2:3], 0x64, s[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s3
@@ -959,7 +959,7 @@ define amdgpu_kernel void @v_icmp_u64_ule(ptr addrspace(1) %out, i64 %src) {
 ; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_ge_u64_e64 s[2:3], 0x64, s[2:3]
+; GFX11-NEXT:    v_cmp_gt_u64_e64 s[2:3], 0x64, s[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s3
@@ -1017,7 +1017,7 @@ define amdgpu_kernel void @v_icmp_i64_sgt(ptr addrspace(1) %out, i64 %src) {
 ; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_lt_i64_e64 s[2:3], 0x64, s[2:3]
+; GFX11-NEXT:    v_cmp_le_i64_e64 s[2:3], 0x64, s[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s3
@@ -1075,7 +1075,7 @@ define amdgpu_kernel void @v_icmp_i64_sge(ptr addrspace(1) %out, i64 %src) {
 ; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_le_i64_e64 s[2:3], 0x64, s[2:3]
+; GFX11-NEXT:    v_cmp_lt_i64_e64 s[2:3], 0x64, s[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s3
@@ -1133,7 +1133,7 @@ define amdgpu_kernel void @v_icmp_i64_slt(ptr addrspace(1) %out, i64 %src) {
 ; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_gt_i64_e64 s[2:3], 0x64, s[2:3]
+; GFX11-NEXT:    v_cmp_ge_i64_e64 s[2:3], 0x64, s[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s3
@@ -1191,7 +1191,7 @@ define amdgpu_kernel void @v_icmp_i64_sle(ptr addrspace(1) %out, i64 %src) {
 ; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_ge_i64_e64 s[2:3], 0x64, s[2:3]
+; GFX11-NEXT:    v_cmp_gt_i64_e64 s[2:3], 0x64, s[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-NEXT:    v_mov_b32_e32 v1, s3
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
index 49a334b8b6c52..209f67b06729b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
@@ -54,7 +54,7 @@ define void @test_readfirstlane_i1_select(ptr addrspace(1) %out, i32 %src, i32 %
 ; CHECK-SDAG-LABEL: test_readfirstlane_i1_select:
 ; CHECK-SDAG:       ; %bb.0:
 ; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-SDAG-NEXT:    v_cmp_lt_u32_e32 vcc, 42, v2
+; CHECK-SDAG-NEXT:    v_cmp_le_u32_e32 vcc, 42, v2
 ; CHECK-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s4, v4
 ; CHECK-SDAG-NEXT:    s_bitcmp1_b32 s4, 0
@@ -67,7 +67,7 @@ define void @test_readfirstlane_i1_select(ptr addrspace(1) %out, i32 %src, i32 %
 ; CHECK-GISEL-LABEL: test_readfirstlane_i1_select:
 ; CHECK-GISEL:       ; %bb.0:
 ; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 42, v2
+; CHECK-GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 42, v2
 ; CHECK-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s4, v4
 ; CHECK-GISEL-NEXT:    s_and_b32 s4, 1, s4
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
index d2ca1d8136043..5ac1f004d7e20 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
@@ -744,7 +744,7 @@ entry:
 define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: divergent_cfg:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX8DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -784,7 +784,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX8GISEL-LABEL: divergent_cfg:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX8GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -821,7 +821,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX9DAGISEL-LABEL: divergent_cfg:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX9DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -860,7 +860,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX9GISEL-LABEL: divergent_cfg:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX9GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -896,7 +896,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1064DAGISEL-LABEL: divergent_cfg:
 ; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX1064DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -935,7 +935,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1064GISEL-LABEL: divergent_cfg:
 ; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX1064GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -971,7 +971,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1032DAGISEL-LABEL: divergent_cfg:
 ; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 15, v0
+; GFX1032DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 15, v0
 ; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
@@ -1010,7 +1010,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1032GISEL-LABEL: divergent_cfg:
 ; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v0
+; GFX1032GISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 16, v0
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
@@ -1050,7 +1050,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX1164DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1164DAGISEL-NEXT:    v_cmpx_le_u32_e32 15, v0
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
@@ -1093,7 +1093,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX1164GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1164GISEL-NEXT:    v_cmpx_lt_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
@@ -1133,7 +1133,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1132DAGISEL-NEXT:    v_cmpx_le_u32_e32 15, v0
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
 ; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
@@ -1176,7 +1176,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132GISEL-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX1132GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1132GISEL-NEXT:    v_cmpx_lt_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
 ; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
index 356b0e73b39e7..150cc484c9375 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
@@ -538,7 +538,7 @@ entry:
 define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: divergent_cfg:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX8DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -574,7 +574,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX8GISEL-LABEL: divergent_cfg:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX8GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -609,7 +609,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX9DAGISEL-LABEL: divergent_cfg:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX9DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -644,7 +644,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX9GISEL-LABEL: divergent_cfg:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX9GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -678,7 +678,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1064DAGISEL-LABEL: divergent_cfg:
 ; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX1064DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -713,7 +713,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1064GISEL-LABEL: divergent_cfg:
 ; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX1064GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -747,7 +747,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1032DAGISEL-LABEL: divergent_cfg:
 ; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 15, v0
+; GFX1032DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 15, v0
 ; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
@@ -782,7 +782,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1032GISEL-LABEL: divergent_cfg:
 ; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v0
+; GFX1032GISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 16, v0
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
@@ -820,7 +820,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX1164DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1164DAGISEL-NEXT:    v_cmpx_le_u32_e32 15, v0
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164DAGISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
@@ -858,7 +858,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX1164GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1164GISEL-NEXT:    v_cmpx_lt_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
@@ -895,7 +895,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1132DAGISEL-NEXT:    v_cmpx_le_u32_e32 15, v0
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
 ; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132DAGISEL-NEXT:    s_load_b32 s1, s[4:5], 0x2c
@@ -933,7 +933,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132GISEL-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX1132GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1132GISEL-NEXT:    v_cmpx_lt_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
 ; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll
index 7dc0cb05b0abe..7186514885863 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll
@@ -538,7 +538,7 @@ entry:
 define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: divergent_cfg:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX8DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -574,7 +574,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX8GISEL-LABEL: divergent_cfg:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX8GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -609,7 +609,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX9DAGISEL-LABEL: divergent_cfg:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX9DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -644,7 +644,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX9GISEL-LABEL: divergent_cfg:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX9GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -678,7 +678,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1064DAGISEL-LABEL: divergent_cfg:
 ; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX1064DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -713,7 +713,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1064GISEL-LABEL: divergent_cfg:
 ; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX1064GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -747,7 +747,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1032DAGISEL-LABEL: divergent_cfg:
 ; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 15, v0
+; GFX1032DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 15, v0
 ; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
@@ -782,7 +782,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1032GISEL-LABEL: divergent_cfg:
 ; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v0
+; GFX1032GISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 16, v0
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
@@ -820,7 +820,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX1164DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1164DAGISEL-NEXT:    v_cmpx_le_u32_e32 15, v0
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164DAGISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
@@ -858,7 +858,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX1164GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1164GISEL-NEXT:    v_cmpx_lt_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
@@ -895,7 +895,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1132DAGISEL-NEXT:    v_cmpx_le_u32_e32 15, v0
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
 ; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132DAGISEL-NEXT:    s_load_b32 s1, s[4:5], 0x2c
@@ -933,7 +933,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132GISEL-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX1132GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1132GISEL-NEXT:    v_cmpx_lt_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
 ; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll
index 7cb0e6533c722..b2828bb142b31 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll
@@ -538,7 +538,7 @@ entry:
 define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: divergent_cfg:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX8DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -574,7 +574,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX8GISEL-LABEL: divergent_cfg:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX8GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -609,7 +609,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX9DAGISEL-LABEL: divergent_cfg:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX9DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -644,7 +644,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX9GISEL-LABEL: divergent_cfg:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX9GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -678,7 +678,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1064DAGISEL-LABEL: divergent_cfg:
 ; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX1064DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -713,7 +713,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1064GISEL-LABEL: divergent_cfg:
 ; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX1064GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -747,7 +747,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1032DAGISEL-LABEL: divergent_cfg:
 ; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 15, v0
+; GFX1032DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 15, v0
 ; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
@@ -782,7 +782,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1032GISEL-LABEL: divergent_cfg:
 ; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v0
+; GFX1032GISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 16, v0
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
@@ -820,7 +820,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX1164DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1164DAGISEL-NEXT:    v_cmpx_le_u32_e32 15, v0
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164DAGISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
@@ -858,7 +858,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX1164GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1164GISEL-NEXT:    v_cmpx_lt_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
@@ -895,7 +895,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1132DAGISEL-NEXT:    v_cmpx_le_u32_e32 15, v0
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
 ; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132DAGISEL-NEXT:    s_load_b32 s1, s[4:5], 0x2c
@@ -933,7 +933,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132GISEL-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX1132GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1132GISEL-NEXT:    v_cmpx_lt_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
 ; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
index e08787e6ba70a..34a641a847dbe 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
@@ -538,7 +538,7 @@ entry:
 define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: divergent_cfg:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX8DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -574,7 +574,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX8GISEL-LABEL: divergent_cfg:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX8GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -609,7 +609,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX9DAGISEL-LABEL: divergent_cfg:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX9DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -644,7 +644,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX9GISEL-LABEL: divergent_cfg:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX9GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -678,7 +678,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1064DAGISEL-LABEL: divergent_cfg:
 ; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX1064DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -713,7 +713,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1064GISEL-LABEL: divergent_cfg:
 ; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX1064GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -747,7 +747,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1032DAGISEL-LABEL: divergent_cfg:
 ; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 15, v0
+; GFX1032DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 15, v0
 ; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
@@ -782,7 +782,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1032GISEL-LABEL: divergent_cfg:
 ; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v0
+; GFX1032GISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 16, v0
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
@@ -820,7 +820,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX1164DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1164DAGISEL-NEXT:    v_cmpx_le_u32_e32 15, v0
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164DAGISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
@@ -858,7 +858,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX1164GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1164GISEL-NEXT:    v_cmpx_lt_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
@@ -895,7 +895,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1132DAGISEL-NEXT:    v_cmpx_le_u32_e32 15, v0
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
 ; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132DAGISEL-NEXT:    s_load_b32 s1, s[4:5], 0x2c
@@ -933,7 +933,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132GISEL-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX1132GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1132GISEL-NEXT:    v_cmpx_lt_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
 ; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll
index edb888a21f735..bf3d41ddb8799 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll
@@ -781,7 +781,7 @@ entry:
 define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: divergent_cfg:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX8DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -822,7 +822,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX8GISEL-LABEL: divergent_cfg:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX8GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -860,7 +860,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX9DAGISEL-LABEL: divergent_cfg:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX9DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -900,7 +900,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX9GISEL-LABEL: divergent_cfg:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX9GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -937,7 +937,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1064DAGISEL-LABEL: divergent_cfg:
 ; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX1064DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -977,7 +977,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1064GISEL-LABEL: divergent_cfg:
 ; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX1064GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -1014,7 +1014,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1032DAGISEL-LABEL: divergent_cfg:
 ; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 15, v0
+; GFX1032DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 15, v0
 ; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
@@ -1054,7 +1054,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1032GISEL-LABEL: divergent_cfg:
 ; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v0
+; GFX1032GISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 16, v0
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
@@ -1095,7 +1095,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX1164DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1164DAGISEL-NEXT:    v_cmpx_le_u32_e32 15, v0
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
@@ -1139,7 +1139,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX1164GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1164GISEL-NEXT:    v_cmpx_lt_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
@@ -1180,7 +1180,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1132DAGISEL-NEXT:    v_cmpx_le_u32_e32 15, v0
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
 ; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
@@ -1224,7 +1224,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132GISEL-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX1132GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1132GISEL-NEXT:    v_cmpx_lt_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
 ; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
index ba7a816184cd8..e53b7aeed9ec7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
@@ -505,7 +505,7 @@ entry:
 define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: divergent_cfg:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX8DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -541,7 +541,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX8GISEL-LABEL: divergent_cfg:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX8GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -576,7 +576,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX9DAGISEL-LABEL: divergent_cfg:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX9DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -611,7 +611,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX9GISEL-LABEL: divergent_cfg:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX9GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -645,7 +645,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1064DAGISEL-LABEL: divergent_cfg:
 ; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX1064DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -680,7 +680,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1064GISEL-LABEL: divergent_cfg:
 ; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX1064GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -714,7 +714,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1032DAGISEL-LABEL: divergent_cfg:
 ; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 15, v0
+; GFX1032DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 15, v0
 ; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
@@ -749,7 +749,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1032GISEL-LABEL: divergent_cfg:
 ; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v0
+; GFX1032GISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 16, v0
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
@@ -787,7 +787,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX1164DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1164DAGISEL-NEXT:    v_cmpx_le_u32_e32 15, v0
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164DAGISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
@@ -825,7 +825,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX1164GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1164GISEL-NEXT:    v_cmpx_lt_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
@@ -862,7 +862,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1132DAGISEL-NEXT:    v_cmpx_le_u32_e32 15, v0
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
 ; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132DAGISEL-NEXT:    s_load_b32 s1, s[4:5], 0x2c
@@ -900,7 +900,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132GISEL-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX1132GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1132GISEL-NEXT:    v_cmpx_lt_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
 ; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
index 3eaa89c957474..d382cdde3e344 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
@@ -505,7 +505,7 @@ entry:
 define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: divergent_cfg:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX8DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -541,7 +541,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX8GISEL-LABEL: divergent_cfg:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX8GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -576,7 +576,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX9DAGISEL-LABEL: divergent_cfg:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX9DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -611,7 +611,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX9GISEL-LABEL: divergent_cfg:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX9GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -645,7 +645,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1064DAGISEL-LABEL: divergent_cfg:
 ; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX1064DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -680,7 +680,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1064GISEL-LABEL: divergent_cfg:
 ; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX1064GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -714,7 +714,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1032DAGISEL-LABEL: divergent_cfg:
 ; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 15, v0
+; GFX1032DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 15, v0
 ; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
@@ -749,7 +749,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1032GISEL-LABEL: divergent_cfg:
 ; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v0
+; GFX1032GISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 16, v0
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
@@ -787,7 +787,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX1164DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1164DAGISEL-NEXT:    v_cmpx_le_u32_e32 15, v0
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164DAGISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
@@ -825,7 +825,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX1164GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1164GISEL-NEXT:    v_cmpx_lt_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
@@ -862,7 +862,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1132DAGISEL-NEXT:    v_cmpx_le_u32_e32 15, v0
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
 ; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132DAGISEL-NEXT:    s_load_b32 s1, s[4:5], 0x2c
@@ -900,7 +900,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132GISEL-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX1132GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1132GISEL-NEXT:    v_cmpx_lt_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
 ; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
index 5b21d5c3aaeb6..76eeec6f61d2a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
@@ -785,7 +785,7 @@ entry:
 define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: divergent_cfg:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX8DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -826,7 +826,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX8GISEL-LABEL: divergent_cfg:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX8GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -864,7 +864,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX9DAGISEL-LABEL: divergent_cfg:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX9DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -904,7 +904,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX9GISEL-LABEL: divergent_cfg:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX9GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -941,7 +941,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1064DAGISEL-LABEL: divergent_cfg:
 ; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX1064DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc, 15, v0
 ; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -981,7 +981,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1064GISEL-LABEL: divergent_cfg:
 ; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX1064GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 16, v0
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
@@ -1018,7 +1018,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1032DAGISEL-LABEL: divergent_cfg:
 ; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 15, v0
+; GFX1032DAGISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 15, v0
 ; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
@@ -1058,7 +1058,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ;
 ; GFX1032GISEL-LABEL: divergent_cfg:
 ; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v0
+; GFX1032GISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 16, v0
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
@@ -1099,7 +1099,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX1164DAGISEL-NEXT:    ; implicit-def: $sgpr2
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1164DAGISEL-NEXT:    v_cmpx_le_u32_e32 15, v0
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
@@ -1143,7 +1143,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX1164GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1164GISEL-NEXT:    v_cmpx_lt_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
@@ -1184,7 +1184,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1132DAGISEL-NEXT:    v_cmpx_le_u32_e32 15, v0
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
 ; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
@@ -1228,7 +1228,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132GISEL-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX1132GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1132GISEL-NEXT:    v_cmpx_lt_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
 ; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
index 956145fb24c4a..67049d298fa5d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
@@ -164,9 +164,9 @@ define i1 @snan_bf16(bfloat %x) nounwind {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7fc0
-; GFX7CHECK-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f80
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e64 s[4:5], s4, v0
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e64 s[4:5], s4, v0
 ; GFX7CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -235,7 +235,7 @@ define i1 @qnan_bf16(bfloat %x) nounwind {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7fbf
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -396,8 +396,8 @@ define i1 @posnormal_bf16(bfloat %x) nounwind {
 ; GFX7CHECK-NEXT:    v_add_i32_e32 v0, vcc, 0xffffff80, v0
 ; GFX7CHECK-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s6, 0x7f00
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e64 s[4:5], -1, v1
-; GFX7CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e64 s[4:5], -1, v1
+; GFX7CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, s6, v0
 ; GFX7CHECK-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -472,8 +472,8 @@ define i1 @negnormal_bf16(bfloat %x) nounwind {
 ; GFX7CHECK-NEXT:    v_add_i32_e32 v0, vcc, 0xffffff80, v0
 ; GFX7CHECK-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s6, 0x7f00
-; GFX7CHECK-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v1
-; GFX7CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_i32_e64 s[4:5], 0, v1
+; GFX7CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, s6, v0
 ; GFX7CHECK-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -547,7 +547,7 @@ define i1 @possubnormal_bf16(bfloat %x) nounwind {
 ; GFX7CHECK-NEXT:    v_add_i32_e32 v0, vcc, -1, v0
 ; GFX7CHECK-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f
-; GFX7CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -605,8 +605,8 @@ define i1 @negsubnormal_bf16(bfloat %x) nounwind {
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    v_add_i32_e64 v0, s[4:5], -1, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f
-; GFX7CHECK-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v1
-; GFX7CHECK-NEXT:    v_cmp_gt_u32_e64 s[4:5], s4, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v1
+; GFX7CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], s4, v0
 ; GFX7CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -777,7 +777,7 @@ define i1 @posfinite_bf16(bfloat %x) nounwind {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f80
-; GFX7CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -829,8 +829,8 @@ define i1 @negfinite_bf16(bfloat %x) nounwind {
 ; GFX7CHECK-NEXT:    v_ashrrev_i32_e32 v1, 16, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f80
-; GFX7CHECK-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v1
-; GFX7CHECK-NEXT:    v_cmp_gt_i32_e64 s[4:5], s4, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v1
+; GFX7CHECK-NEXT:    v_cmp_ge_i32_e64 s[4:5], s4, v0
 ; GFX7CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -897,7 +897,7 @@ define i1 @isnan_bf16(bfloat %x) nounwind {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f80
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -953,7 +953,7 @@ define i1 @not_isnan_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f81
-; GFX7CHECK-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1011,9 +1011,9 @@ define <2 x i1> @isnan_v2bf16(<2 x bfloat> %x) nounwind {
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f80
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    v_bfe_u32 v1, v1, 16, 15
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v1
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s4, v1
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1085,12 +1085,12 @@ define <3 x i1> @isnan_v3bf16(<3 x bfloat> %x) nounwind {
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v2, 1.0, v2
 ; GFX7CHECK-NEXT:    v_bfe_u32 v1, v1, 16, 15
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v2, v2, 16, 15
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v1
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s4, v1
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v2
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s4, v2
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1177,16 +1177,16 @@ define <4 x i1> @isnan_v4bf16(<4 x bfloat> %x) nounwind {
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v2, 1.0, v2
 ; GFX7CHECK-NEXT:    v_bfe_u32 v1, v1, 16, 15
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v3, 1.0, v3
 ; GFX7CHECK-NEXT:    v_bfe_u32 v2, v2, 16, 15
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v1
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s4, v1
 ; GFX7CHECK-NEXT:    v_bfe_u32 v3, v3, 16, 15
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v2
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s4, v2
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v3
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s4, v3
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1344,7 +1344,7 @@ define i1 @isfinite_bf16(bfloat %x) nounwind {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f80
-; GFX7CHECK-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1512,7 +1512,7 @@ define i1 @isnormal_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_add_i32_e32 v0, vcc, 0xffffff80, v0
 ; GFX7CHECK-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f00
-; GFX7CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1575,7 +1575,7 @@ define i1 @not_isnormal_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_add_i32_e32 v0, vcc, 0xffffff80, v0
 ; GFX7CHECK-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7eff
-; GFX7CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1639,8 +1639,8 @@ define i1 @not_is_plus_normal_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_add_i32_e32 v0, vcc, 0xffffff80, v0
 ; GFX7CHECK-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s6, 0x7eff
-; GFX7CHECK-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v1
-; GFX7CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_i32_e64 s[4:5], 0, v1
+; GFX7CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
 ; GFX7CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -1715,8 +1715,8 @@ define i1 @not_is_neg_normal_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_add_i32_e32 v0, vcc, 0xffffff80, v0
 ; GFX7CHECK-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s6, 0x7eff
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e64 s[4:5], -1, v1
-; GFX7CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v0
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e64 s[4:5], -1, v1
+; GFX7CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
 ; GFX7CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -1789,7 +1789,7 @@ define i1 @issubnormal_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    v_add_i32_e32 v0, vcc, -1, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f
-; GFX7CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1851,7 +1851,7 @@ define i1 @not_issubnormal_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    v_add_i32_e32 v0, vcc, -1, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7e
-; GFX7CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2018,7 +2018,7 @@ define i1 @ispositive_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f81
-; GFX7CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2071,13 +2071,13 @@ define i1 @not_ispositive_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_ashrrev_i32_e32 v2, 16, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s6, 0x7f80
-; GFX7CHECK-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v2
-; GFX7CHECK-NEXT:    v_cmp_gt_i32_e64 s[4:5], s6, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v2
+; GFX7CHECK-NEXT:    v_cmp_ge_i32_e64 s[4:5], s6, v0
 ; GFX7CHECK-NEXT:    s_mov_b32 s7, 0xff80
 ; GFX7CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s7, v1
 ; GFX7CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s6, v0
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s6, v0
 ; GFX7CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -2168,8 +2168,8 @@ define i1 @isnegative_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_ashrrev_i32_e32 v2, 16, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f80
-; GFX7CHECK-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v2
-; GFX7CHECK-NEXT:    v_cmp_gt_i32_e64 s[4:5], s4, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v2
+; GFX7CHECK-NEXT:    v_cmp_ge_i32_e64 s[4:5], s4, v0
 ; GFX7CHECK-NEXT:    s_mov_b32 s6, 0xff80
 ; GFX7CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s6, v1
@@ -2252,9 +2252,9 @@ define i1 @not_isnegative_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f80
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f81
-; GFX7CHECK-NEXT:    v_cmp_gt_u32_e64 s[4:5], s4, v1
+; GFX7CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], s4, v1
 ; GFX7CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -2323,7 +2323,7 @@ define i1 @iszero_or_nan_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f80
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
 ; GFX7CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@@ -2392,7 +2392,7 @@ define i1 @iszero_or_nan_f_daz(bfloat %x) #0 {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f80
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
 ; GFX7CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@@ -2461,7 +2461,7 @@ define i1 @iszero_or_nan_f_maybe_daz(bfloat %x) #1 {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f80
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
 ; GFX7CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@@ -2530,7 +2530,7 @@ define i1 @not_iszero_or_nan_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f81
-; GFX7CHECK-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
 ; GFX7CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@@ -2599,7 +2599,7 @@ define i1 @not_iszero_or_nan_f_daz(bfloat %x) #0 {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f81
-; GFX7CHECK-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
 ; GFX7CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@@ -2668,7 +2668,7 @@ define i1 @not_iszero_or_nan_f_maybe_daz(bfloat %x) #1 {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f81
-; GFX7CHECK-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
 ; GFX7CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@@ -2737,7 +2737,7 @@ define i1 @iszero_or_qnan_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7fbf
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
 ; GFX7CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@@ -2806,9 +2806,9 @@ define i1 @iszero_or_snan_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7fc0
-; GFX7CHECK-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f80
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e64 s[4:5], s4, v0
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e64 s[4:5], s4, v0
 ; GFX7CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX7CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
@@ -2891,19 +2891,19 @@ define i1 @not_iszero_or_qnan_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7fc0
 ; GFX7CHECK-NEXT:    s_movk_i32 s8, 0x7f80
-; GFX7CHECK-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e64 s[4:5], s8, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e64 s[4:5], s8, v0
 ; GFX7CHECK-NEXT:    s_and_b64 s[6:7], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_add_i32_e64 v1, s[4:5], -1, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f
 ; GFX7CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s8, v0
-; GFX7CHECK-NEXT:    v_cmp_gt_u32_e64 s[4:5], s4, v1
+; GFX7CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], s4, v1
 ; GFX7CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_add_i32_e32 v0, vcc, 0xffffff80, v0
 ; GFX7CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
 ; GFX7CHECK-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s6, 0x7f00
-; GFX7CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, s6, v0
 ; GFX7CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -3020,15 +3020,15 @@ define i1 @not_iszero_or_snan_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_add_i32_e64 v1, s[4:5], -1, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f
-; GFX7CHECK-NEXT:    v_cmp_gt_u32_e64 s[4:5], s4, v1
+; GFX7CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], s4, v1
 ; GFX7CHECK-NEXT:    s_movk_i32 s6, 0x7fbf
 ; GFX7CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s6, v0
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s6, v0
 ; GFX7CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_add_i32_e32 v0, vcc, 0xffffff80, v0
 ; GFX7CHECK-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7CHECK-NEXT:    s_movk_i32 s6, 0x7f00
-; GFX7CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, s6, v0
 ; GFX7CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -3132,7 +3132,7 @@ define i1 @isinf_or_nan_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f7f
-; GFX7CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -3189,7 +3189,7 @@ define i1 @not_isinf_or_nan_bf16(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f80
-; GFX7CHECK-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index 18c462ffd0ff5..fd02c166ae45b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -171,8 +171,8 @@ define i1 @snan_f16(half %x) nounwind {
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7e00
 ; GFX7SELDAG-NEXT:    s_movk_i32 s5, 0x7c00
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e64 s[4:5], s5, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e64 s[4:5], s5, v0
 ; GFX7SELDAG-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -247,7 +247,7 @@ define i1 @qnan_f16(half %x) nounwind {
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7dff
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -467,8 +467,8 @@ define i1 @posnormal_f16(half %x) nounwind {
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX7SELDAG-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e64 s[4:5], -1, v1
-; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e64 s[4:5], -1, v1
+; GFX7SELDAG-NEXT:    v_cmp_ge_u32_e32 vcc, s6, v0
 ; GFX7SELDAG-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -552,8 +552,8 @@ define i1 @negnormal_f16(half %x) nounwind {
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX7SELDAG-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v1
-; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_i32_e64 s[4:5], 0, v1
+; GFX7SELDAG-NEXT:    v_cmp_ge_u32_e32 vcc, s6, v0
 ; GFX7SELDAG-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -633,7 +633,7 @@ define i1 @possubnormal_f16(half %x) nounwind {
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x3ff
 ; GFX7SELDAG-NEXT:    v_add_i32_e32 v0, vcc, -1, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_u32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -710,8 +710,8 @@ define i1 @negsubnormal_f16(half %x) nounwind {
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX7SELDAG-NEXT:    v_add_i32_e64 v0, s[4:5], -1, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x3ff
-; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v1
-; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v1
+; GFX7SELDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], s4, v0
 ; GFX7SELDAG-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -929,7 +929,7 @@ define i1 @posfinite_f16(half %x) nounwind {
 ; GFX7SELDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c00
-; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_u32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1004,8 +1004,8 @@ define i1 @negfinite_f16(half %x) nounwind {
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c00
 ; GFX7SELDAG-NEXT:    v_bfe_i32 v1, v0, 0, 16
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v1
-; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e64 s[4:5], s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v1
+; GFX7SELDAG-NEXT:    v_cmp_ge_i32_e64 s[4:5], s4, v0
 ; GFX7SELDAG-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -1082,7 +1082,7 @@ define i1 @isnan_f16(half %x) nounwind {
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c00
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1155,7 +1155,7 @@ define i1 @not_isnan_f16(half %x) {
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c01
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1232,9 +1232,9 @@ define <2 x i1> @isnan_v2f16(<2 x half> %x) nounwind {
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c00
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v1
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s4, v1
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1366,12 +1366,12 @@ define <3 x i1> @isnan_v3f16(<3 x half> %x) nounwind {
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c00
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v2, 0x7fff, v2
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v1
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s4, v1
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v2
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s4, v2
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1535,15 +1535,15 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind {
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c00
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v2, 0x7fff, v2
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v1
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s4, v1
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v3, 0x7fff, v3
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v2
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s4, v2
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v3
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s4, v3
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1733,7 +1733,7 @@ define i1 @isnan_f16_strictfp(half %x) strictfp nounwind {
 ; GFX7SELDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1881,7 +1881,7 @@ define i1 @isfinite_f16(half %x) nounwind {
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c00
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2112,7 +2112,7 @@ define i1 @isnormal_f16(half %x) {
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX7SELDAG-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_u32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2190,7 +2190,7 @@ define i1 @not_isnormal_f16(half %x) {
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX7SELDAG-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX7SELDAG-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2274,8 +2274,8 @@ define i1 @not_is_plus_normal_f16(half %x) {
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX7SELDAG-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v1
-; GFX7SELDAG-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_i32_e64 s[4:5], 0, v1
+; GFX7SELDAG-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -2368,8 +2368,8 @@ define i1 @not_is_neg_normal_f16(half %x) {
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX7SELDAG-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e64 s[4:5], -1, v1
-; GFX7SELDAG-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e64 s[4:5], -1, v1
+; GFX7SELDAG-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -2460,7 +2460,7 @@ define i1 @issubnormal_f16(half %x) {
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x3ff
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX7SELDAG-NEXT:    v_add_i32_e32 v0, vcc, -1, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_u32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2537,7 +2537,7 @@ define i1 @not_issubnormal_f16(half %x) {
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x3fe
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX7SELDAG-NEXT:    v_add_i32_e32 v0, vcc, -1, v0
-; GFX7SELDAG-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2779,7 +2779,7 @@ define i1 @ispositive_f16(half %x) {
 ; GFX7SELDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c01
-; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_u32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2855,12 +2855,12 @@ define i1 @not_ispositive_f16(half %x) {
 ; GFX7SELDAG-NEXT:    s_mov_b32 s7, 0xfc00
 ; GFX7SELDAG-NEXT:    v_bfe_i32 v1, v0, 0, 16
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v2, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v1
-; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e64 s[4:5], s6, v2
+; GFX7SELDAG-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v1
+; GFX7SELDAG-NEXT:    v_cmp_ge_i32_e64 s[4:5], s6, v2
 ; GFX7SELDAG-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_cmp_eq_u32_e32 vcc, s7, v0
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s6, v2
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s6, v2
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -2944,8 +2944,8 @@ define i1 @isnegative_f16(half %x) {
 ; GFX7SELDAG-NEXT:    s_mov_b32 s6, 0xfc00
 ; GFX7SELDAG-NEXT:    v_bfe_i32 v1, v0, 0, 16
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v2, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v1
-; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e64 s[4:5], s4, v2
+; GFX7SELDAG-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v1
+; GFX7SELDAG-NEXT:    v_cmp_ge_i32_e64 s[4:5], s4, v2
 ; GFX7SELDAG-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_cmp_eq_u32_e32 vcc, s6, v0
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
@@ -3027,9 +3027,9 @@ define i1 @not_isnegative_f16(half %x) {
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c01
 ; GFX7SELDAG-NEXT:    s_movk_i32 s5, 0x7c00
-; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_u32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e64 s[4:5], s5, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e64 s[4:5], s5, v0
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -3109,7 +3109,7 @@ define i1 @iszero_or_nan_f16(half %x) {
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c00
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@@ -3188,7 +3188,7 @@ define i1 @iszero_or_nan_f_daz(half %x) #0 {
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c00
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@@ -3267,7 +3267,7 @@ define i1 @iszero_or_nan_f_maybe_daz(half %x) #1 {
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c00
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@@ -3346,7 +3346,7 @@ define i1 @not_iszero_or_nan_f16(half %x) {
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c01
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
 ; GFX7SELDAG-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@@ -3434,7 +3434,7 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 {
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c01
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
 ; GFX7SELDAG-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@@ -3522,7 +3522,7 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 {
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c01
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
 ; GFX7SELDAG-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@@ -3610,7 +3610,7 @@ define i1 @iszero_or_qnan_f16(half %x) {
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7dff
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@@ -3690,8 +3690,8 @@ define i1 @iszero_or_snan_f16(half %x) {
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7e00
 ; GFX7SELDAG-NEXT:    s_movk_i32 s5, 0x7c00
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e64 s[4:5], s5, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e64 s[4:5], s5, v0
 ; GFX7SELDAG-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
@@ -3774,19 +3774,19 @@ define i1 @not_iszero_or_qnan_f16(half %x) {
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7e00
 ; GFX7SELDAG-NEXT:    s_movk_i32 s8, 0x7c00
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e64 s[4:5], s8, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e64 s[4:5], s8, v0
 ; GFX7SELDAG-NEXT:    s_and_b64 s[6:7], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_add_i32_e64 v1, s[4:5], -1, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x3ff
 ; GFX7SELDAG-NEXT:    v_cmp_eq_u32_e32 vcc, s8, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], s4, v1
+; GFX7SELDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], s4, v1
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s6, 0x7800
-; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_u32_e32 vcc, s6, v0
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -3881,14 +3881,14 @@ define i1 @not_iszero_or_snan_f16(half %x) {
 ; GFX7SELDAG-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_add_i32_e64 v1, s[4:5], -1, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x3ff
-; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], s4, v1
+; GFX7SELDAG-NEXT:    v_cmp_ge_u32_e64 s[4:5], s4, v1
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s6, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s6, v0
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s6, 0x7800
-; GFX7SELDAG-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_u32_e32 vcc, s6, v0
 ; GFX7SELDAG-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -3978,7 +3978,7 @@ define i1 @isinf_or_nan_f16(half %x) {
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7bff
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_le_i32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4054,7 +4054,7 @@ define i1 @not_isinf_or_nan_f16(half %x) {
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c00
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
index 66cf8a309ccf0..95f513dbdd4ee 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
@@ -155,12 +155,12 @@ define { i64, i1 } @smulo_i64_v_v(i64 %x, i64 %y) {
 ; SI-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
 ; SI-NEXT:    v_sub_i32_e32 v2, vcc, v8, v2
 ; SI-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v9, vcc
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v1
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v1
 ; SI-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
 ; SI-NEXT:    v_sub_i32_e32 v0, vcc, v2, v0
 ; SI-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v3
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v3
 ; SI-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-NEXT:    v_cmp_ne_u64_e32 vcc, v[0:1], v[6:7]
@@ -187,12 +187,12 @@ define { i64, i1 } @smulo_i64_v_v(i64 %x, i64 %y) {
 ; GFX9-NEXT:    v_addc_co_u32_e32 v9, vcc, 0, v9, vcc
 ; GFX9-NEXT:    v_sub_co_u32_e32 v2, vcc, v7, v2
 ; GFX9-NEXT:    v_subbrev_co_u32_e32 v10, vcc, 0, v9, vcc
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v4
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v9, v10, vcc
 ; GFX9-NEXT:    v_sub_co_u32_e32 v5, vcc, v2, v5
 ; GFX9-NEXT:    v_subbrev_co_u32_e32 v7, vcc, 0, v4, vcc
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v3
+; GFX9-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v3
 ; GFX9-NEXT:    v_add3_u32 v1, v1, v6, v8
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v4, v7, vcc
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
@@ -221,13 +221,13 @@ define { i64, i1 } @smulo_i64_v_v(i64 %x, i64 %y) {
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, 0, v9, vcc_lo
 ; GFX10-NEXT:    v_sub_co_u32 v2, vcc_lo, v7, v2
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v10, vcc_lo, 0, v9, vcc_lo
-; GFX10-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 0, v5
+; GFX10-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 0, v5
 ; GFX10-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc_lo
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
 ; GFX10-NEXT:    v_sub_co_u32 v4, vcc_lo, v6, v4
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v7, vcc_lo, 0, v5, vcc_lo
-; GFX10-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 0, v3
+; GFX10-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 0, v3
 ; GFX10-NEXT:    v_mov_b32_e32 v3, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc_lo
@@ -259,7 +259,7 @@ define { i64, i1 } @smulo_i64_v_v(i64 %x, i64 %y) {
 ; GFX11-NEXT:    v_sub_co_u32 v2, vcc_lo, v7, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_subrev_co_ci_u32_e64 v10, null, 0, v9, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 0, v5
+; GFX11-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 0, v5
 ; GFX11-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc_lo
@@ -267,7 +267,7 @@ define { i64, i1 } @smulo_i64_v_v(i64 %x, i64 %y) {
 ; GFX11-NEXT:    v_sub_co_u32 v4, vcc_lo, v6, v4
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
 ; GFX11-NEXT:    v_subrev_co_ci_u32_e64 v7, null, 0, v5, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 0, v3
+; GFX11-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 0, v3
 ; GFX11-NEXT:    v_mov_b32_e32 v3, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_dual_cndmask_b32 v4, v6, v4 :: v_dual_cndmask_b32 v5, v5, v7
@@ -307,7 +307,7 @@ define { i64, i1 } @smulo_i64_v_v(i64 %x, i64 %y) {
 ; GFX12-NEXT:    v_sub_co_u32 v2, vcc_lo, v7, v2
 ; GFX12-NEXT:    s_wait_alu 0xfffd
 ; GFX12-NEXT:    v_subrev_co_ci_u32_e64 v10, null, 0, v9, vcc_lo
-; GFX12-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 0, v5
+; GFX12-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 0, v5
 ; GFX12-NEXT:    s_wait_alu 0xfffd
 ; GFX12-NEXT:    v_cndmask_b32_e32 v6, v7, v2, vcc_lo
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
@@ -317,7 +317,7 @@ define { i64, i1 } @smulo_i64_v_v(i64 %x, i64 %y) {
 ; GFX12-NEXT:    s_wait_alu 0xfffd
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3)
 ; GFX12-NEXT:    v_subrev_co_ci_u32_e64 v7, null, 0, v5, vcc_lo
-; GFX12-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 0, v3
+; GFX12-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 0, v3
 ; GFX12-NEXT:    v_mov_b32_e32 v3, v2
 ; GFX12-NEXT:    s_wait_alu 0xfffd
 ; GFX12-NEXT:    v_dual_cndmask_b32 v4, v6, v4 :: v_dual_cndmask_b32 v5, v5, v7
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
index af914bd4043cf..5388698ac7dd8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
@@ -88,10 +88,10 @@ define amdgpu_kernel void @v_round_f64(ptr addrspace(1) %out, ptr addrspace(1) %
 ; SI-NEXT:    v_not_b32_e32 v4, v4
 ; SI-NEXT:    v_and_b32_e32 v5, v3, v5
 ; SI-NEXT:    v_and_b32_e32 v4, v2, v4
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v6
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v6
 ; SI-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
 ; SI-NEXT:    v_cndmask_b32_e64 v4, v4, 0, vcc
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 51, v6
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, 51, v6
 ; SI-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
 ; SI-NEXT:    v_add_f64 v[6:7], v[2:3], -v[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
index 2864e0554a27e..2539d7ad00283 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
@@ -23,8 +23,8 @@ define void @needs_and(i32 %arg) {
 ; GCN-NEXT:    s_cbranch_execz .LBB0_4
 ; GCN-NEXT:  .LBB0_2: ; %loop
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_cmp_gt_u32_e64 s[4:5], s10, v0
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s10, v0
+; GCN-NEXT:    v_cmp_ge_u32_e64 s[4:5], s10, v0
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s10, v0
 ; GCN-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
 ; GCN-NEXT:    s_cbranch_execz .LBB0_1
 ; GCN-NEXT:  ; %bb.3: ; %then
@@ -70,7 +70,7 @@ define void @doesnt_need_and(i32 %arg) {
 ; GCN-NEXT:  .LBB1_1: ; %loop
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN-NEXT:    s_add_i32 s6, s6, 1
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v0
 ; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    s_cbranch_execnz .LBB1_1
@@ -114,7 +114,7 @@ define void @break_cond_is_arg(i32 %arg, i1 %breakcond) {
 ; GCN-NEXT:    s_cbranch_execz .LBB2_4
 ; GCN-NEXT:  .LBB2_2: ; %loop
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_cmp_gt_u32_e64 s[4:5], s10, v0
+; GCN-NEXT:    v_cmp_ge_u32_e64 s[4:5], s10, v0
 ; GCN-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
 ; GCN-NEXT:    s_cbranch_execz .LBB2_1
 ; GCN-NEXT:  ; %bb.3: ; %then
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
index 553d7e09390fd..c7be51c0636be 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
@@ -339,7 +339,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 ; CHECK-NEXT:    s_or_b32 exec_lo, exec_lo, s55
 ; CHECK-NEXT:  ; %bb.24: ; in Loop: Header=BB0_5 Depth=1
 ; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc_lo, s54, v45
-; CHECK-NEXT:    v_cmp_lt_u32_e64 s4, 59, v47
+; CHECK-NEXT:    v_cmp_le_u32_e64 s4, 59, v47
 ; CHECK-NEXT:    v_add_nc_u32_e32 v46, 1, v46
 ; CHECK-NEXT:    s_mov_b32 s55, s54
 ; CHECK-NEXT:    s_or_b32 s4, vcc_lo, s4
@@ -461,7 +461,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; CHECK-NEXT:    v_bfe_u32 v0, v0, v74, 4
 ; CHECK-NEXT:    s_mov_b32 s4, exec_lo
-; CHECK-NEXT:    v_cmpx_gt_u32_e32 12, v0
+; CHECK-NEXT:    v_cmpx_ge_u32_e32 12, v0
 ; CHECK-NEXT:    s_xor_b32 s4, exec_lo, s4
 ; CHECK-NEXT:    s_cbranch_execz .LBB0_31
 ; CHECK-NEXT:  ; %bb.30: ; in Loop: Header=BB0_28 Depth=1
@@ -967,7 +967,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
 ; CHECK-NEXT:  ; %bb.12: ; %.32
 ; CHECK-NEXT:    ; in Loop: Header=BB1_1 Depth=1
 ; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc_lo, s53, v45
-; CHECK-NEXT:    v_cmp_lt_u32_e64 s4, 59, v43
+; CHECK-NEXT:    v_cmp_le_u32_e64 s4, 59, v43
 ; CHECK-NEXT:    s_or_b32 s4, vcc_lo, s4
 ; CHECK-NEXT:    s_and_b32 s4, exec_lo, s4
 ; CHECK-NEXT:    s_or_b32 s52, s4, s52
diff --git a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll
index 9cc42ac448067..3e543a5ad1e0a 100644
--- a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll
+++ b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll
@@ -71,7 +71,7 @@ define void @memcpy_p0_p0_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(0)
 ; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[84:87]
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(15)
 ; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[96:99] offset:16
-; CHECK-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u64_e64 s6, 0x800, s[4:5]
 ; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
 ; CHECK-NEXT:    s_cbranch_vccnz .LBB0_1
 ; CHECK-NEXT:  ; %bb.2: ; %memcpy-split
@@ -147,7 +147,7 @@ define void @memcpy_p0_p0_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(0)
 ; ALIGNED-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:88
 ; ALIGNED-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:84
 ; ALIGNED-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:80
-; ALIGNED-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; ALIGNED-NEXT:    v_cmp_ge_u64_e64 s6, 0x800, s[4:5]
 ; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
 ; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v51 offset:238
 ; ALIGNED-NEXT:    flat_store_byte v[16:17], v51 offset:236
@@ -749,7 +749,7 @@ define void @memcpy_p0_p0_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(0)
 ; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[8:11] offset:16
 ; UNROLL3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(2)
 ; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[12:15] offset:32
-; UNROLL3-NEXT:    v_cmp_gt_u64_e64 s6, 0x7e0, s[4:5]
+; UNROLL3-NEXT:    v_cmp_ge_u64_e64 s6, 0x7e0, s[4:5]
 ; UNROLL3-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
 ; UNROLL3-NEXT:    s_cbranch_vccnz .LBB0_1
 ; UNROLL3-NEXT:  ; %bb.2: ; %memcpy-split
@@ -828,7 +828,7 @@ define void @memcpy_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1)
 ; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[84:87], off
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[96:99], off offset:16
-; CHECK-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u64_e64 s6, 0x800, s[4:5]
 ; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
 ; CHECK-NEXT:    s_cbranch_vccnz .LBB1_1
 ; CHECK-NEXT:  ; %bb.2: ; %memcpy-split
@@ -902,7 +902,7 @@ define void @memcpy_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1)
 ; ALIGNED-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:88
 ; ALIGNED-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:84
 ; ALIGNED-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:80
-; ALIGNED-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; ALIGNED-NEXT:    v_cmp_ge_u64_e64 s6, 0x800, s[4:5]
 ; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
 ; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v51, off offset:238
 ; ALIGNED-NEXT:    global_store_byte v[16:17], v51, off offset:236
@@ -1503,7 +1503,7 @@ define void @memcpy_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1)
 ; UNROLL3-NEXT:    global_store_dwordx4 v[16:17], v[8:11], off offset:16
 ; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
 ; UNROLL3-NEXT:    global_store_dwordx4 v[16:17], v[12:15], off offset:32
-; UNROLL3-NEXT:    v_cmp_gt_u64_e64 s6, 0x7e0, s[4:5]
+; UNROLL3-NEXT:    v_cmp_ge_u64_e64 s6, 0x7e0, s[4:5]
 ; UNROLL3-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
 ; UNROLL3-NEXT:    s_cbranch_vccnz .LBB1_1
 ; UNROLL3-NEXT:  ; %bb.2: ; %memcpy-split
@@ -1581,7 +1581,7 @@ define void @memcpy_p0_p4_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(4)
 ; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[84:87] offset:16
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[96:99]
-; CHECK-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u64_e64 s6, 0x800, s[4:5]
 ; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
 ; CHECK-NEXT:    s_cbranch_vccnz .LBB2_1
 ; CHECK-NEXT:  ; %bb.2: ; %memcpy-split
@@ -1815,7 +1815,7 @@ define void @memcpy_p0_p4_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(4)
 ; ALIGNED-NEXT:    flat_store_byte v[96:97], v69 offset:173
 ; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
 ; ALIGNED-NEXT:    v_lshrrev_b32_e32 v69, 24, v31
-; ALIGNED-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; ALIGNED-NEXT:    v_cmp_ge_u64_e64 s6, 0x800, s[4:5]
 ; ALIGNED-NEXT:    flat_store_byte v[96:97], v70 offset:163
 ; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 8, v31
 ; ALIGNED-NEXT:    flat_store_byte v[96:97], v71 offset:161
@@ -2110,7 +2110,7 @@ define void @memcpy_p0_p4_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(4)
 ; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
 ; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
 ; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[12:15] offset:32
-; UNROLL3-NEXT:    v_cmp_gt_u64_e64 s6, 0x7e0, s[4:5]
+; UNROLL3-NEXT:    v_cmp_ge_u64_e64 s6, 0x7e0, s[4:5]
 ; UNROLL3-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
 ; UNROLL3-NEXT:    s_cbranch_vccnz .LBB2_1
 ; UNROLL3-NEXT:  ; %bb.2: ; %memcpy-split
@@ -2203,7 +2203,7 @@ define void @memcpy_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5)
 ; CHECK-NEXT:    s_add_u32 s4, s4, 0x100
 ; CHECK-NEXT:    s_addc_u32 s5, s5, 0
 ; CHECK-NEXT:    v_add_nc_u32_e32 v1, 0x100, v1
-; CHECK-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u64_e64 s6, 0x800, s[4:5]
 ; CHECK-NEXT:    s_waitcnt vmcnt(62)
 ; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:252
 ; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:248
@@ -2447,7 +2447,7 @@ define void @memcpy_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5)
 ; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:203
 ; ALIGNED-NEXT:    s_add_u32 s4, s4, 0x100
 ; ALIGNED-NEXT:    s_addc_u32 s5, s5, 0
-; ALIGNED-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; ALIGNED-NEXT:    v_cmp_ge_u64_e64 s6, 0x800, s[4:5]
 ; ALIGNED-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
 ; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
 ; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:712 ; 4-byte Folded Spill
@@ -3514,7 +3514,7 @@ define void @memcpy_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5)
 ; UNROLL3-NEXT:    s_add_u32 s4, s4, 48
 ; UNROLL3-NEXT:    s_addc_u32 s5, s5, 0
 ; UNROLL3-NEXT:    v_add_nc_u32_e32 v2, 48, v2
-; UNROLL3-NEXT:    v_cmp_gt_u64_e64 s6, 0x7e0, s[4:5]
+; UNROLL3-NEXT:    v_cmp_ge_u64_e64 s6, 0x7e0, s[4:5]
 ; UNROLL3-NEXT:    s_waitcnt vmcnt(11)
 ; UNROLL3-NEXT:    buffer_store_dword v4, v3, s[0:3], 0 offen offset:44
 ; UNROLL3-NEXT:    s_waitcnt vmcnt(10)
@@ -3652,7 +3652,7 @@ define void @memcpy_p0_p5_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(5)
 ; CHECK-NEXT:    v_add_co_ci_u32_e64 v101, null, s5, v1, vcc_lo
 ; CHECK-NEXT:    s_addc_u32 s5, s5, 0
 ; CHECK-NEXT:    v_add_nc_u32_e32 v2, 0x100, v2
-; CHECK-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u64_e64 s6, 0x800, s[4:5]
 ; CHECK-NEXT:    s_waitcnt vmcnt(35)
 ; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[31:34] offset:240
 ; CHECK-NEXT:    s_waitcnt vmcnt(32)
@@ -4608,7 +4608,7 @@ define void @memcpy_p0_p5_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(5)
 ; ALIGNED-NEXT:    buffer_store_dword v123, off, s[0:3], s32 offset:244
 ; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1220 ; 4-byte Folded Reload
 ; ALIGNED-NEXT:    s_addc_u32 s5, s5, 0
-; ALIGNED-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; ALIGNED-NEXT:    v_cmp_ge_u64_e64 s6, 0x800, s[4:5]
 ; ALIGNED-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
 ; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
 ; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:240
@@ -5375,7 +5375,7 @@ define void @memcpy_p0_p5_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(5)
 ; UNROLL3-NEXT:    v_add_co_ci_u32_e64 v17, null, s5, v1, vcc_lo
 ; UNROLL3-NEXT:    s_addc_u32 s5, s5, 0
 ; UNROLL3-NEXT:    v_add_nc_u32_e32 v3, 48, v3
-; UNROLL3-NEXT:    v_cmp_gt_u64_e64 s6, 0x7e0, s[4:5]
+; UNROLL3-NEXT:    v_cmp_ge_u64_e64 s6, 0x7e0, s[4:5]
 ; UNROLL3-NEXT:    s_waitcnt vmcnt(4)
 ; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[8:11] offset:16
 ; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[4:7]
diff --git a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
index 9a2d969f94e3e..de87d4a06f26b 100644
--- a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
+++ b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
@@ -93,7 +93,6 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(ptr addrspace(3) captu
 ; IR:       [[BB23]]:
 ; IR-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP6]])
 ; IR-NEXT:    ret void
-;
 bb:
   %my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %my.tmp1 = getelementptr inbounds i64, ptr addrspace(3) %arg, i32 %my.tmp
@@ -151,7 +150,7 @@ define amdgpu_kernel void @nested_loop_conditions(ptr addrspace(1) captures(none
 ; GCN-NEXT:    s_mov_b32 s2, -1
 ; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, 8, v0
+; GCN-NEXT:    v_cmp_le_i32_e32 vcc, 8, v0
 ; GCN-NEXT:    s_cbranch_vccnz .LBB1_6
 ; GCN-NEXT:  ; %bb.1: ; %bb14.lr.ph
 ; GCN-NEXT:    s_load_dword s4, s[0:1], 0x0
@@ -175,14 +174,14 @@ define amdgpu_kernel void @nested_loop_conditions(ptr addrspace(1) captures(none
 ; GCN-NEXT:    ; => This Inner Loop Header: Depth=2
 ; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, 8, v0
+; GCN-NEXT:    v_cmp_le_i32_e32 vcc, 8, v0
 ; GCN-NEXT:    s_cbranch_vccnz .LBB1_4
 ; GCN-NEXT:  ; %bb.5: ; %bb21
 ; GCN-NEXT:    ; in Loop: Header=BB1_3 Depth=1
 ; GCN-NEXT:    s_load_dword s4, s[0:1], 0x0
 ; GCN-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_lt_i32_e64 s[0:1], 8, v0
+; GCN-NEXT:    v_cmp_le_i32_e64 s[0:1], 8, v0
 ; GCN-NEXT:    s_branch .LBB1_2
 ; GCN-NEXT:  .LBB1_6: ; %bb31
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
@@ -278,7 +277,6 @@ define amdgpu_kernel void @nested_loop_conditions(ptr addrspace(1) captures(none
 ; IR-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP7]])
 ; IR-NEXT:    store volatile i32 0, ptr addrspace(1) poison, align 4
 ; IR-NEXT:    ret void
-;
 bb:
   %my.tmp1134 = load volatile i32, ptr addrspace(1) poison
   %my.tmp1235 = icmp slt i32 %my.tmp1134, 9
diff --git a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
index 24a4d8fbde200..3ed858996f8e8 100644
--- a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
@@ -593,7 +593,7 @@ define i32 @atomicrmw_inc_private_i32(ptr addrspace(5) %ptr) {
 ; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_add_i32_e32 v2, vcc, 1, v1
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 4, v1
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, 4, v1
 ; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
 ; GCN-NEXT:    v_mov_b32_e32 v0, v1
@@ -622,7 +622,7 @@ define i32 @atomicrmw_dec_private_i32(ptr addrspace(5) %ptr) {
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_add_i32_e32 v2, vcc, -1, v1
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GCN-NEXT:    v_cmp_lt_u32_e64 s[4:5], 4, v1
+; GCN-NEXT:    v_cmp_le_u32_e64 s[4:5], 4, v1
 ; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, 4, s[4:5]
 ; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
diff --git a/llvm/test/CodeGen/AMDGPU/rem_i128.ll b/llvm/test/CodeGen/AMDGPU/rem_i128.ll
index d29a7a2dc5656..d009f18a1cebd 100644
--- a/llvm/test/CodeGen/AMDGPU/rem_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/rem_i128.ll
@@ -14,7 +14,7 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_subb_co_u32_e32 v9, vcc, 0, v1, vcc
 ; GFX9-NEXT:    v_subb_co_u32_e32 v10, vcc, 0, v2, vcc
 ; GFX9-NEXT:    v_subb_co_u32_e32 v11, vcc, 0, v3, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
+; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[2:3]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v20, 31, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
@@ -24,7 +24,7 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_subb_co_u32_e32 v9, vcc, 0, v5, vcc
 ; GFX9-NEXT:    v_subb_co_u32_e32 v10, vcc, 0, v6, vcc
 ; GFX9-NEXT:    v_subb_co_u32_e32 v11, vcc, 0, v7, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[6:7]
+; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[6:7]
 ; GFX9-NEXT:    v_mov_b32_e32 v21, v20
 ; GFX9-NEXT:    v_cndmask_b32_e32 v22, v5, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v23, v4, v8, vcc
@@ -69,7 +69,7 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_subb_co_u32_e32 v8, vcc, 0, v9, vcc
 ; GFX9-NEXT:    v_subb_co_u32_e32 v9, vcc, 0, v9, vcc
 ; GFX9-NEXT:    s_mov_b64 s[6:7], 0x7f
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[6:7]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[6:7]
 ; GFX9-NEXT:    v_or_b32_e32 v12, v7, v9
 ; GFX9-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
@@ -106,7 +106,7 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_lshlrev_b64 v[6:7], v6, v[0:1]
 ; GFX9-NEXT:    v_or_b32_e32 v8, v10, v12
 ; GFX9-NEXT:    v_or_b32_e32 v9, v9, v11
-; GFX9-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v13
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v13
 ; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v13, v[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v13
@@ -126,7 +126,7 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_sub_u32_e32 v12, 64, v24
 ; GFX9-NEXT:    v_lshrrev_b64 v[8:9], v24, v[0:1]
 ; GFX9-NEXT:    v_lshlrev_b64 v[12:13], v12, v[2:3]
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v24
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v24
 ; GFX9-NEXT:    v_or_b32_e32 v12, v8, v12
 ; GFX9-NEXT:    v_subrev_u32_e32 v8, 64, v24
 ; GFX9-NEXT:    v_or_b32_e32 v13, v9, v13
@@ -1541,7 +1541,7 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_mov_b32_e32 v11, 0
 ; GFX9-NEXT:    v_subb_co_u32_e32 v10, vcc, 0, v11, vcc
 ; GFX9-NEXT:    v_subb_co_u32_e32 v11, vcc, 0, v11, vcc
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[6:7], v[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
@@ -1578,7 +1578,7 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_lshlrev_b64 v[8:9], v8, v[0:1]
 ; GFX9-NEXT:    v_or_b32_e32 v10, v12, v14
 ; GFX9-NEXT:    v_or_b32_e32 v11, v11, v13
-; GFX9-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v15
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[4:5], 64, v15
 ; GFX9-NEXT:    v_cndmask_b32_e64 v9, v9, v10, s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v8, v8, v11, s[4:5]
 ; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v15, v[0:1]
@@ -1598,7 +1598,7 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-NEXT:    v_sub_u32_e32 v14, 64, v22
 ; GFX9-NEXT:    v_lshrrev_b64 v[12:13], v22, v[0:1]
 ; GFX9-NEXT:    v_lshlrev_b64 v[14:15], v14, v[2:3]
-; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v22
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v22
 ; GFX9-NEXT:    v_or_b32_e32 v14, v12, v14
 ; GFX9-NEXT:    v_subrev_u32_e32 v12, 64, v22
 ; GFX9-NEXT:    v_or_b32_e32 v15, v13, v15
diff --git a/llvm/test/CodeGen/AMDGPU/saddo.ll b/llvm/test/CodeGen/AMDGPU/saddo.ll
index 4177179b31c06..6945b24f42b52 100644
--- a/llvm/test/CodeGen/AMDGPU/saddo.ll
+++ b/llvm/test/CodeGen/AMDGPU/saddo.ll
@@ -250,7 +250,7 @@ define amdgpu_kernel void @v_saddo_i32(ptr addrspace(1) %out, ptr addrspace(1) %
 ; SI-NEXT:    s_mov_b32 s5, s3
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_add_i32_e32 v2, vcc, v0, v1
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v1
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v1
 ; SI-NEXT:    v_cmp_lt_i32_e64 s[0:1], v2, v0
 ; SI-NEXT:    s_xor_b64 s[0:1], vcc, s[0:1]
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
@@ -274,7 +274,7 @@ define amdgpu_kernel void @v_saddo_i32(ptr addrspace(1) %out, ptr addrspace(1) %
 ; VI-NEXT:    v_mov_b32_e32 v3, s3
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    v_add_u32_e32 v6, vcc, v4, v5
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v5
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v5
 ; VI-NEXT:    v_cmp_lt_i32_e64 s[0:1], v6, v4
 ; VI-NEXT:    s_xor_b64 s[0:1], vcc, s[0:1]
 ; VI-NEXT:    flat_store_dword v[0:1], v6
@@ -478,7 +478,7 @@ define amdgpu_kernel void @v_saddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_add_i32_e32 v4, vcc, v0, v2
 ; SI-NEXT:    v_addc_u32_e32 v5, vcc, v1, v3, vcc
-; SI-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
+; SI-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[2:3]
 ; SI-NEXT:    v_cmp_lt_i64_e64 s[0:1], v[4:5], v[0:1]
 ; SI-NEXT:    buffer_store_dwordx2 v[4:5], off, s[8:11], 0
 ; SI-NEXT:    s_xor_b64 s[0:1], vcc, s[0:1]
@@ -503,7 +503,7 @@ define amdgpu_kernel void @v_saddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    v_add_u32_e32 v8, vcc, v0, v2
 ; VI-NEXT:    v_addc_u32_e32 v9, vcc, v1, v3, vcc
-; VI-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
+; VI-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[2:3]
 ; VI-NEXT:    v_cmp_lt_i64_e64 s[0:1], v[8:9], v[0:1]
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[8:9]
 ; VI-NEXT:    s_xor_b64 s[0:1], vcc, s[0:1]
@@ -521,7 +521,7 @@ define amdgpu_kernel void @v_saddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
+; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, 0, v[2:3]
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[0:1], v[4:5], v[0:1]
 ; GFX9-NEXT:    global_store_dwordx2 v6, v[4:5], s[8:9]
 ; GFX9-NEXT:    s_xor_b64 s[0:1], vcc, s[0:1]
@@ -540,7 +540,7 @@ define amdgpu_kernel void @v_saddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_gt_i64_e32 vcc_lo, 0, v[2:3]
+; GFX10-NEXT:    v_cmp_ge_i64_e32 vcc_lo, 0, v[2:3]
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s0, v[4:5], v[0:1]
 ; GFX10-NEXT:    s_xor_b32 s0, vcc_lo, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
@@ -560,7 +560,7 @@ define amdgpu_kernel void @v_saddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
 ; GFX11-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i64_e32 vcc_lo, 0, v[2:3]
+; GFX11-NEXT:    v_cmp_ge_i64_e32 vcc_lo, 0, v[2:3]
 ; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, v[4:5], v[0:1]
 ; GFX11-NEXT:    s_xor_b32 s0, vcc_lo, s0
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
@@ -603,9 +603,9 @@ define amdgpu_kernel void @v_saddo_v2i32(ptr addrspace(1) %out, ptr addrspace(1)
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_add_i32_e32 v5, vcc, v1, v3
 ; SI-NEXT:    v_add_i32_e32 v4, vcc, v0, v2
-; SI-NEXT:    v_cmp_gt_i32_e64 s[0:1], 0, v3
+; SI-NEXT:    v_cmp_ge_i32_e64 s[0:1], 0, v3
 ; SI-NEXT:    v_cmp_lt_i32_e64 s[4:5], v5, v1
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v2
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v2
 ; SI-NEXT:    v_cmp_lt_i32_e64 s[2:3], v4, v0
 ; SI-NEXT:    s_xor_b64 s[0:1], s[0:1], s[4:5]
 ; SI-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
@@ -632,9 +632,9 @@ define amdgpu_kernel void @v_saddo_v2i32(ptr addrspace(1) %out, ptr addrspace(1)
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    v_add_u32_e32 v9, vcc, v1, v3
 ; VI-NEXT:    v_add_u32_e32 v8, vcc, v0, v2
-; VI-NEXT:    v_cmp_gt_i32_e64 s[0:1], 0, v3
+; VI-NEXT:    v_cmp_ge_i32_e64 s[0:1], 0, v3
 ; VI-NEXT:    v_cmp_lt_i32_e64 s[4:5], v9, v1
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v2
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v2
 ; VI-NEXT:    v_cmp_lt_i32_e64 s[2:3], v8, v0
 ; VI-NEXT:    s_xor_b64 s[0:1], s[0:1], s[4:5]
 ; VI-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/saddsat.ll b/llvm/test/CodeGen/AMDGPU/saddsat.ll
index 4e27cf20d3c98..2950e14da2d92 100644
--- a/llvm/test/CodeGen/AMDGPU/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/saddsat.ll
@@ -120,7 +120,7 @@ define i32 @v_saddsat_i32(i32 %lhs, i32 %rhs) {
 ; GFX6-LABEL: v_saddsat_i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v1
+; GFX6-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v1
 ; GFX6-NEXT:    v_add_i32_e64 v1, s[4:5], v0, v1
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v1, v0
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v1
@@ -132,7 +132,7 @@ define i32 @v_saddsat_i32(i32 %lhs, i32 %rhs) {
 ; GFX8-LABEL: v_saddsat_i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v1
+; GFX8-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v1
 ; GFX8-NEXT:    v_add_u32_e64 v1, s[4:5], v0, v1
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v1, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v1
@@ -379,7 +379,7 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX6-LABEL: v_saddsat_v2i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v2
+; GFX6-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v2
 ; GFX6-NEXT:    v_add_i32_e64 v2, s[4:5], v0, v2
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v2, v0
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v2
@@ -387,7 +387,7 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-NEXT:    v_add_i32_e64 v2, s[4:5], v1, v3
-; GFX6-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v3
+; GFX6-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v3
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v2, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v2
 ; GFX6-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
@@ -398,7 +398,7 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX8-LABEL: v_saddsat_v2i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v2
+; GFX8-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v2
 ; GFX8-NEXT:    v_add_u32_e64 v2, s[4:5], v0, v2
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v2, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v2
@@ -406,7 +406,7 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-NEXT:    v_add_u32_e64 v2, s[4:5], v1, v3
-; GFX8-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v3
+; GFX8-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v3
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v2, v1
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v2
 ; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
@@ -438,7 +438,7 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v0, v2
 ; GFX6-NEXT:    v_addc_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX6-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
+; GFX6-NEXT:    v_cmp_ge_i64_e64 s[4:5], 0, v[2:3]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v5
 ; GFX6-NEXT:    s_xor_b64 vcc, s[4:5], vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
@@ -452,7 +452,7 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v0, v2
 ; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX8-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
+; GFX8-NEXT:    v_cmp_ge_i64_e64 s[4:5], 0, v[2:3]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v5
 ; GFX8-NEXT:    s_xor_b64 vcc, s[4:5], vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
@@ -466,7 +466,7 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX9-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
+; GFX9-NEXT:    v_cmp_ge_i64_e64 s[4:5], 0, v[2:3]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v1, 31, v5
 ; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
@@ -479,7 +479,7 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_gt_i64_e64 s4, 0, v[2:3]
+; GFX10-NEXT:    v_cmp_ge_i64_e64 s4, 0, v[2:3]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
 ; GFX10-NEXT:    v_xor_b32_e32 v1, 0x80000000, v6
@@ -493,7 +493,7 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v2
 ; GFX11-NEXT:    v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i64_e64 s0, 0, v[2:3]
+; GFX11-NEXT:    v_cmp_ge_i64_e64 s0, 0, v[2:3]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
 ; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v6
diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
index 0981584598abd..20cb33a8eb01f 100644
--- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
@@ -105,9 +105,9 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GCN-NEXT:    v_subb_u32_e64 v4, s[0:1], v4, v5, vcc
 ; GCN-NEXT:    v_subrev_i32_e64 v5, s[0:1], s10, v3
 ; GCN-NEXT:    v_subbrev_u32_e64 v4, s[0:1], 0, v4, s[0:1]
-; GCN-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v4
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[0:1], s11, v4
 ; GCN-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[0:1]
-; GCN-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v5
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[0:1], s10, v5
 ; GCN-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v4
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, v6, v5, s[0:1]
@@ -120,9 +120,9 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GCN-NEXT:    v_cndmask_b32_e64 v5, v6, v8, s[0:1]
 ; GCN-NEXT:    v_mov_b32_e32 v6, s3
 ; GCN-NEXT:    v_subb_u32_e32 v2, vcc, v6, v2, vcc
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s11, v2
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s11, v2
 ; GCN-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s10, v3
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s10, v3
 ; GCN-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, s11, v2
 ; GCN-NEXT:    v_cndmask_b32_e32 v2, v6, v3, vcc
@@ -378,7 +378,7 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) {
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[6:7]
 ; GCN-IR-NEXT:    v_subb_u32_e64 v3, s[6:7], 0, 0, s[6:7]
-; GCN-IR-NEXT:    v_cmp_lt_u64_e64 s[6:7], 63, v[2:3]
+; GCN-IR-NEXT:    v_cmp_le_u64_e64 s[6:7], 63, v[2:3]
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
 ; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 63, v[2:3]
@@ -1192,9 +1192,9 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
 ; GCN-NEXT:    v_subb_u32_e64 v3, s[0:1], v3, v4, vcc
 ; GCN-NEXT:    v_subrev_i32_e64 v4, s[0:1], s2, v2
 ; GCN-NEXT:    v_subbrev_u32_e64 v3, s[0:1], 0, v3, s[0:1]
-; GCN-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v3
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[0:1], s3, v3
 ; GCN-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
-; GCN-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v4
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[0:1], s2, v4
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[0:1]
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s[0:1], s3, v3
 ; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, v4, s[0:1]
@@ -1204,11 +1204,11 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
 ; GCN-NEXT:    v_addc_u32_e64 v7, s[0:1], 0, 0, s[0:1]
 ; GCN-NEXT:    v_subb_u32_e32 v1, vcc, 0, v1, vcc
 ; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v3
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s3, v1
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s3, v1
 ; GCN-NEXT:    v_cndmask_b32_e64 v3, v4, v6, s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, v5, v7, s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s2, v2
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v2
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v1
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
@@ -1425,7 +1425,7 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_add_i32_e32 v2, vcc, s6, v8
 ; GCN-IR-NEXT:    v_addc_u32_e64 v3, s[6:7], 0, -1, vcc
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
-; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[2:3]
+; GCN-IR-NEXT:    v_cmp_le_u64_e32 vcc, 63, v[2:3]
 ; GCN-IR-NEXT:    v_cmp_ne_u64_e64 s[6:7], 63, v[2:3]
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GCN-IR-NEXT:    v_cndmask_b32_e64 v4, 24, 0, s[4:5]
@@ -1618,7 +1618,7 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_add_i32_e32 v2, vcc, s6, v8
 ; GCN-IR-NEXT:    v_addc_u32_e64 v3, s[6:7], 0, -1, vcc
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
-; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[2:3]
+; GCN-IR-NEXT:    v_cmp_le_u64_e32 vcc, 63, v[2:3]
 ; GCN-IR-NEXT:    v_cmp_ne_u64_e64 s[6:7], 63, v[2:3]
 ; GCN-IR-NEXT:    v_mov_b32_e32 v4, 0x8000
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
@@ -1719,7 +1719,7 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_sub_i32_e64 v0, s[4:5], 48, v12
 ; GCN-IR-NEXT:    v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
-; GCN-IR-NEXT:    v_cmp_lt_u64_e64 s[4:5], 63, v[0:1]
+; GCN-IR-NEXT:    v_cmp_le_u64_e64 s[4:5], 63, v[0:1]
 ; GCN-IR-NEXT:    v_mov_b32_e32 v11, v10
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 63, v[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/select-constant-xor.ll b/llvm/test/CodeGen/AMDGPU/select-constant-xor.ll
index c3e63d81e67a6..dc5b9782550a6 100644
--- a/llvm/test/CodeGen/AMDGPU/select-constant-xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-constant-xor.ll
@@ -108,7 +108,7 @@ define i32 @icmpasreq(i32 %input, i32 %a, i32 %b) {
 ; CHECK-LABEL: icmpasreq:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 0, v0
+; CHECK-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 0, v0
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %sh = ashr i32 %input, 31
@@ -121,7 +121,7 @@ define i32 @icmpasrne(i32 %input, i32 %a, i32 %b) {
 ; CHECK-LABEL: icmpasrne:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cmp_lt_i32_e32 vcc_lo, -1, v0
+; CHECK-NEXT:    v_cmp_le_i32_e32 vcc_lo, -1, v0
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %sh = ashr i32 %input, 31
@@ -134,7 +134,7 @@ define i32 @oneusecmp(i32 %a, i32 %b, i32 %d) {
 ; CHECK-LABEL: oneusecmp:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 0, v0
+; CHECK-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 0, v0
 ; CHECK-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc_lo
 ; CHECK-NEXT:    v_xad_u32 v0, 0x7f, v3, v0
diff --git a/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll b/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll
index 6f841c88a6d8b..3cf0a9ea1f14f 100644
--- a/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll
+++ b/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll
@@ -11,7 +11,7 @@ define amdgpu_cs void @if_then(ptr addrspace(8) inreg %input, ptr addrspace(8) i
 ; GCN-NEXT:    v_mov_b32_e32 v3, 1
 ; GCN-NEXT:  ; %bb.2: ; %.merge
 ; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s0
-; GCN-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 3, v0
+; GCN-NEXT:    v_cmp_le_u32_e32 vcc_lo, 3, v0
 ; GCN-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GCN-NEXT:    s_cbranch_execz .LBB0_4
 ; GCN-NEXT:  ; %bb.3: ; %.then
@@ -67,7 +67,7 @@ define amdgpu_cs void @if_else_vgpr_opt(ptr addrspace(8) inreg %input, ptr addrs
 ; GCN-NEXT:    v_mov_b32_e32 v3, 1
 ; GCN-NEXT:  ; %bb.2: ; %.merge
 ; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s0
-; GCN-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 3, v0
+; GCN-NEXT:    v_cmp_le_u32_e32 vcc_lo, 3, v0
 ; GCN-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GCN-NEXT:    s_xor_b32 s0, exec_lo, s0
 ; GCN-NEXT:    s_cbranch_execnz .LBB1_5
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
index 220e8705af252..24953967c189d 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
@@ -172,7 +172,7 @@ define amdgpu_kernel void @sgpr_if_else_valu_cmp_phi_br(ptr addrspace(1) %out, p
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_cmp_gt_i32_e64 s[8:9], 0, v0
+; SI-NEXT:    v_cmp_ge_i32_e64 s[8:9], 0, v0
 ; SI-NEXT:    ; implicit-def: $vgpr0
 ; SI-NEXT:  .LBB3_2: ; %Flow
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/shift-i128.ll b/llvm/test/CodeGen/AMDGPU/shift-i128.ll
index 5734c81c043fd..40858c4853485 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-i128.ll
@@ -13,7 +13,7 @@ define i128 @v_shl_i128_vv(i128 %lhs, i128 %rhs) {
 ; GCN-NEXT:    v_subrev_i32_e32 v5, vcc, 64, v4
 ; GCN-NEXT:    v_or_b32_e32 v8, v6, v8
 ; GCN-NEXT:    v_lshl_b64 v[5:6], v[0:1], v5
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v4
 ; GCN-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
 ; GCN-NEXT:    v_lshl_b64 v[0:1], v[0:1], v4
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, v5, v2, s[4:5]
@@ -38,7 +38,7 @@ define i128 @v_lshr_i128_vv(i128 %lhs, i128 %rhs) {
 ; GCN-NEXT:    v_subrev_i32_e32 v5, vcc, 64, v4
 ; GCN-NEXT:    v_or_b32_e32 v8, v6, v8
 ; GCN-NEXT:    v_lshr_b64 v[5:6], v[2:3], v5
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v4
 ; GCN-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
 ; GCN-NEXT:    v_lshr_b64 v[2:3], v[2:3], v4
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
@@ -64,7 +64,7 @@ define i128 @v_ashr_i128_vv(i128 %lhs, i128 %rhs) {
 ; GCN-NEXT:    v_subrev_i32_e32 v5, vcc, 64, v4
 ; GCN-NEXT:    v_or_b32_e32 v8, v6, v8
 ; GCN-NEXT:    v_ashr_i64 v[5:6], v[2:3], v5
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v4
 ; GCN-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
 ; GCN-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
@@ -129,7 +129,7 @@ define i128 @v_shl_i128_kv(i128 %rhs) {
 ; GCN-NEXT:    v_lshr_b64 v[2:3], 17, v1
 ; GCN-NEXT:    v_subrev_i32_e32 v1, vcc, 64, v0
 ; GCN-NEXT:    v_lshl_b64 v[4:5], 17, v1
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v0
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
 ; GCN-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, v1, s[4:5]
@@ -149,7 +149,7 @@ define i128 @v_lshr_i128_kv(i128 %rhs) {
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    s_mov_b64 s[4:5], 0x41
 ; GCN-NEXT:    v_lshr_b64 v[1:2], s[4:5], v0
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v0
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0x41
 ; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
@@ -167,7 +167,7 @@ define i128 @v_ashr_i128_kv(i128 %rhs) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_lshr_b64 v[1:2], 33, v0
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v0
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, 64, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, 33, v1, vcc
@@ -295,7 +295,7 @@ define <2 x i128> @v_shl_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GCN-NEXT:    v_sub_i32_e32 v16, vcc, 64, v8
 ; GCN-NEXT:    v_lshr_b64 v[16:17], v[0:1], v16
 ; GCN-NEXT:    v_lshl_b64 v[18:19], v[2:3], v8
-; GCN-NEXT:    v_cmp_gt_u64_e32 vcc, 64, v[8:9]
+; GCN-NEXT:    v_cmp_ge_u64_e32 vcc, 64, v[8:9]
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[10:11]
 ; GCN-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GCN-NEXT:    v_subrev_i32_e64 v9, s[6:7], 64, v8
@@ -313,7 +313,7 @@ define <2 x i128> @v_shl_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GCN-NEXT:    v_lshl_b64 v[16:17], v[6:7], v12
 ; GCN-NEXT:    v_cndmask_b32_e64 v3, v11, v3, s[4:5]
 ; GCN-NEXT:    v_or_b32_e32 v16, v16, v9
-; GCN-NEXT:    v_cmp_gt_u64_e64 s[4:5], 64, v[12:13]
+; GCN-NEXT:    v_cmp_ge_u64_e64 s[4:5], 64, v[12:13]
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[14:15]
 ; GCN-NEXT:    v_subrev_i32_e64 v9, s[8:9], 64, v12
 ; GCN-NEXT:    v_or_b32_e32 v11, v17, v10
@@ -344,7 +344,7 @@ define <2 x i128> @v_lshr_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GCN-NEXT:    v_sub_i32_e32 v16, vcc, 64, v8
 ; GCN-NEXT:    v_lshl_b64 v[16:17], v[2:3], v16
 ; GCN-NEXT:    v_lshr_b64 v[18:19], v[0:1], v8
-; GCN-NEXT:    v_cmp_gt_u64_e32 vcc, 64, v[8:9]
+; GCN-NEXT:    v_cmp_ge_u64_e32 vcc, 64, v[8:9]
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[10:11]
 ; GCN-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GCN-NEXT:    v_subrev_i32_e64 v9, s[6:7], 64, v8
@@ -362,7 +362,7 @@ define <2 x i128> @v_lshr_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GCN-NEXT:    v_lshr_b64 v[16:17], v[4:5], v12
 ; GCN-NEXT:    v_cndmask_b32_e64 v1, v11, v1, s[4:5]
 ; GCN-NEXT:    v_or_b32_e32 v16, v16, v9
-; GCN-NEXT:    v_cmp_gt_u64_e64 s[4:5], 64, v[12:13]
+; GCN-NEXT:    v_cmp_ge_u64_e64 s[4:5], 64, v[12:13]
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[14:15]
 ; GCN-NEXT:    v_subrev_i32_e64 v9, s[8:9], 64, v12
 ; GCN-NEXT:    v_or_b32_e32 v11, v17, v10
@@ -393,7 +393,7 @@ define <2 x i128> @v_ashr_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GCN-NEXT:    v_sub_i32_e32 v16, vcc, 64, v8
 ; GCN-NEXT:    v_lshl_b64 v[16:17], v[2:3], v16
 ; GCN-NEXT:    v_lshr_b64 v[18:19], v[0:1], v8
-; GCN-NEXT:    v_cmp_gt_u64_e32 vcc, 64, v[8:9]
+; GCN-NEXT:    v_cmp_ge_u64_e32 vcc, 64, v[8:9]
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[10:11]
 ; GCN-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GCN-NEXT:    v_subrev_i32_e64 v9, s[6:7], 64, v8
@@ -411,7 +411,7 @@ define <2 x i128> @v_ashr_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GCN-NEXT:    v_lshr_b64 v[16:17], v[4:5], v12
 ; GCN-NEXT:    v_cndmask_b32_e64 v1, v11, v1, s[4:5]
 ; GCN-NEXT:    v_or_b32_e32 v16, v16, v9
-; GCN-NEXT:    v_cmp_gt_u64_e64 s[4:5], 64, v[12:13]
+; GCN-NEXT:    v_cmp_ge_u64_e64 s[4:5], 64, v[12:13]
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[14:15]
 ; GCN-NEXT:    v_subrev_i32_e64 v9, s[8:9], 64, v12
 ; GCN-NEXT:    v_or_b32_e32 v11, v17, v10
diff --git a/llvm/test/CodeGen/AMDGPU/shl64_reduce.ll b/llvm/test/CodeGen/AMDGPU/shl64_reduce.ll
index 21b7ed4d6b779..98ae6f0df54fd 100644
--- a/llvm/test/CodeGen/AMDGPU/shl64_reduce.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl64_reduce.ll
@@ -430,10 +430,10 @@ define i64 @shl_maxmin(i64 %arg0, i64 noundef %arg1) {
 ; CHECK-LABEL: shl_maxmin:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[2:3]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[2:3]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v2, 32, v2, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[2:3]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[2:3]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v2, 63, v2, vcc
 ; CHECK-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -447,15 +447,15 @@ define <2 x i64> @shl_v2_maxmin(<2 x i64> %arg0, <2 x i64> noundef %arg1) {
 ; CHECK-LABEL: shl_v2_maxmin:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[4:5]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[4:5]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v4, 32, v4, vcc
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[6:7]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[6:7]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v6, 32, v6, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[6:7]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[6:7]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v6, 63, v6, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[4:5]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[4:5]
 ; CHECK-NEXT:    v_lshlrev_b64 v[2:3], v6, v[2:3]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v4, 63, v4, vcc
 ; CHECK-NEXT:    v_lshlrev_b64 v[0:1], v4, v[0:1]
@@ -470,21 +470,21 @@ define <3 x i64> @shl_v3_maxmin(<3 x i64> %arg0, <3 x i64> noundef %arg1) {
 ; CHECK-LABEL: shl_v3_maxmin:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[6:7]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[6:7]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v6, 32, v6, vcc
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[8:9]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[8:9]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v8, 32, v8, vcc
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[10:11]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[10:11]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v11, 0, v11, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v10, 32, v10, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[10:11]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[10:11]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v10, 63, v10, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[8:9]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[8:9]
 ; CHECK-NEXT:    v_lshlrev_b64 v[4:5], v10, v[4:5]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v8, 63, v8, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[6:7]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[6:7]
 ; CHECK-NEXT:    v_lshlrev_b64 v[2:3], v8, v[2:3]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v6, 63, v6, vcc
 ; CHECK-NEXT:    v_lshlrev_b64 v[0:1], v6, v[0:1]
@@ -499,27 +499,27 @@ define <4 x i64> @shl_v4_maxmin(<4 x i64> %arg0, <4 x i64> noundef %arg1) {
 ; CHECK-LABEL: shl_v4_maxmin:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[8:9]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[8:9]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v8, 32, v8, vcc
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[10:11]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[10:11]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v11, 0, v11, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v10, 32, v10, vcc
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[12:13]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[12:13]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v13, 0, v13, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v12, 32, v12, vcc
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[14:15]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[14:15]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v15, 0, v15, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v14, 32, v14, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[14:15]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[14:15]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v14, 63, v14, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[12:13]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[12:13]
 ; CHECK-NEXT:    v_lshlrev_b64 v[6:7], v14, v[6:7]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v12, 63, v12, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[10:11]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[10:11]
 ; CHECK-NEXT:    v_lshlrev_b64 v[4:5], v12, v[4:5]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v10, 63, v10, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[8:9]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[8:9]
 ; CHECK-NEXT:    v_lshlrev_b64 v[2:3], v10, v[2:3]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v8, 63, v8, vcc
 ; CHECK-NEXT:    v_lshlrev_b64 v[0:1], v8, v[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll b/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
index 3519befabd3bc..922a1034bbbd1 100644
--- a/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
@@ -7,7 +7,7 @@ define amdgpu_cs void @should_not_hoist_set_inactive(<4 x i32> inreg %i14, i32 i
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s5, 0, v0
 ; GCN-NEXT:    v_cmp_ne_u32_e64 s6, 0, v2
 ; GCN-NEXT:    s_mov_b32 s7, 0
-; GCN-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 3, v1
+; GCN-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 3, v1
 ; GCN-NEXT:    s_branch .LBB0_2
 ; GCN-NEXT:  .LBB0_1: ; %bb4
 ; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
diff --git a/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll b/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll
index 2dfb72a08cffc..fbbc18ff2954e 100644
--- a/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll
@@ -16,7 +16,7 @@ define amdgpu_kernel void @kernel(i32 %a, ptr addrspace(1) %x, i32 noundef %n) {
 ; CHECK-NEXT:    s_cmpk_lg_i32 s0, 0x100
 ; CHECK-NEXT:    s_cbranch_scc0 .LBB0_6
 ; CHECK-NEXT:  ; %bb.1: ; %if.else
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 10, v0
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, 10, v0
 ; CHECK-NEXT:    s_mov_b64 s[4:5], 0
 ; CHECK-NEXT:    s_mov_b64 s[2:3], 0
 ; CHECK-NEXT:    s_mov_b64 s[0:1], 0
diff --git a/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll b/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll
index f232275c998d2..fc400c5f3c68e 100644
--- a/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll
@@ -1,5 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -lowerswitch -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -verify -S %s -o - | FileCheck -check-prefix=IR %s
 
diff --git a/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll
index 2efe27df2d10d..ff71a4fc905c9 100644
--- a/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll
@@ -10,7 +10,7 @@ define i1 @test_srem_odd(i29 %X) nounwind {
 ; CHECK-NEXT:    v_mul_lo_u32 v0, v0, s4
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, 0x295fad, v0
 ; CHECK-NEXT:    v_and_b32_e32 v0, 0x1fffffff, v0
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, s5, v0
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, s5, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %srem = srem i29 %X, 99
diff --git a/llvm/test/CodeGen/AMDGPU/srem.ll b/llvm/test/CodeGen/AMDGPU/srem.ll
index 6da7d1b7ee868..cbbc34467eeab 100644
--- a/llvm/test/CodeGen/AMDGPU/srem.ll
+++ b/llvm/test/CodeGen/AMDGPU/srem.ll
@@ -1607,7 +1607,7 @@ define amdgpu_kernel void @srem_i64(ptr addrspace(1) %out, ptr addrspace(1) %in)
 ; GCN-NEXT:    s_subb_u32 s15, s7, 0
 ; GCN-NEXT:    s_cmp_ge_u32 s15, s13
 ; GCN-NEXT:    s_cselect_b32 s16, -1, 0
-; GCN-NEXT:    v_cmp_le_u32_e64 s[2:3], s12, v1
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[2:3], s12, v1
 ; GCN-NEXT:    s_cmp_eq_u32 s15, s13
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
 ; GCN-NEXT:    v_mov_b32_e32 v3, s16
@@ -1627,7 +1627,7 @@ define amdgpu_kernel void @srem_i64(ptr addrspace(1) %out, ptr addrspace(1) %in)
 ; GCN-NEXT:    s_subb_u32 s0, s17, s5
 ; GCN-NEXT:    s_cmp_ge_u32 s0, s13
 ; GCN-NEXT:    s_cselect_b32 s1, -1, 0
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s12, v0
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s12, v0
 ; GCN-NEXT:    s_cmp_eq_u32 s0, s13
 ; GCN-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v4, s1
@@ -2806,7 +2806,7 @@ define amdgpu_kernel void @srem_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %i
 ; GCN-NEXT:    s_subb_u32 s19, s13, 0
 ; GCN-NEXT:    s_cmp_ge_u32 s19, s17
 ; GCN-NEXT:    s_cselect_b32 s20, -1, 0
-; GCN-NEXT:    v_cmp_le_u32_e64 s[2:3], s16, v1
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[2:3], s16, v1
 ; GCN-NEXT:    s_cmp_eq_u32 s19, s17
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
 ; GCN-NEXT:    v_mov_b32_e32 v3, s20
@@ -2826,7 +2826,7 @@ define amdgpu_kernel void @srem_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %i
 ; GCN-NEXT:    s_subb_u32 s0, s21, s11
 ; GCN-NEXT:    s_cmp_ge_u32 s0, s17
 ; GCN-NEXT:    s_cselect_b32 s1, -1, 0
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s16, v0
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s16, v0
 ; GCN-NEXT:    s_cmp_eq_u32 s0, s17
 ; GCN-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v4, s1
@@ -2972,7 +2972,7 @@ define amdgpu_kernel void @srem_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %i
 ; GCN-NEXT:    s_subb_u32 s15, s7, 0
 ; GCN-NEXT:    s_cmp_ge_u32 s15, s13
 ; GCN-NEXT:    s_cselect_b32 s16, -1, 0
-; GCN-NEXT:    v_cmp_le_u32_e64 s[2:3], s12, v3
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[2:3], s12, v3
 ; GCN-NEXT:    s_cmp_eq_u32 s15, s13
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[2:3]
 ; GCN-NEXT:    v_mov_b32_e32 v5, s16
@@ -2992,7 +2992,7 @@ define amdgpu_kernel void @srem_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %i
 ; GCN-NEXT:    s_subb_u32 s0, s17, s5
 ; GCN-NEXT:    s_cmp_ge_u32 s0, s13
 ; GCN-NEXT:    s_cselect_b32 s1, -1, 0
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s12, v2
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s12, v2
 ; GCN-NEXT:    s_cmp_eq_u32 s0, s13
 ; GCN-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v6, s1
@@ -3021,10 +3021,10 @@ define amdgpu_kernel void @srem_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %i
 ; GCN-NEXT:    v_mul_lo_u32 v2, v2, s4
 ; GCN-NEXT:    v_sub_u32_e32 v2, s6, v2
 ; GCN-NEXT:    v_subrev_u32_e32 v3, s4, v2
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s4, v2
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v2
 ; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GCN-NEXT:    v_subrev_u32_e32 v3, s4, v2
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s4, v2
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v2
 ; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v3, 0
 ; GCN-NEXT:  .LBB10_6:
@@ -4980,7 +4980,7 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i
 ; GCN-NEXT:    s_subb_u32 s15, s7, 0
 ; GCN-NEXT:    s_cmp_ge_u32 s15, s13
 ; GCN-NEXT:    s_cselect_b32 s16, -1, 0
-; GCN-NEXT:    v_cmp_le_u32_e64 s[2:3], s12, v9
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[2:3], s12, v9
 ; GCN-NEXT:    s_cmp_eq_u32 s15, s13
 ; GCN-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[2:3]
 ; GCN-NEXT:    v_mov_b32_e32 v11, s16
@@ -5000,7 +5000,7 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i
 ; GCN-NEXT:    s_subb_u32 s0, s17, s5
 ; GCN-NEXT:    s_cmp_ge_u32 s0, s13
 ; GCN-NEXT:    s_cselect_b32 s1, -1, 0
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s12, v8
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s12, v8
 ; GCN-NEXT:    s_cmp_eq_u32 s0, s13
 ; GCN-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v14, s1
diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll
index a5cb9a507bd89..fe8c43273f15a 100644
--- a/llvm/test/CodeGen/AMDGPU/srem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/srem64.ll
@@ -94,9 +94,9 @@ define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GCN-NEXT:    v_subb_u32_e64 v2, s[0:1], v2, v3, vcc
 ; GCN-NEXT:    v_subrev_i32_e64 v4, s[0:1], s12, v0
 ; GCN-NEXT:    v_subbrev_u32_e64 v5, s[2:3], 0, v2, s[0:1]
-; GCN-NEXT:    v_cmp_le_u32_e64 s[2:3], s13, v5
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[2:3], s13, v5
 ; GCN-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[2:3]
-; GCN-NEXT:    v_cmp_le_u32_e64 s[2:3], s12, v4
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[2:3], s12, v4
 ; GCN-NEXT:    v_subb_u32_e64 v2, s[0:1], v2, v3, s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[2:3]
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s[2:3], s13, v5
@@ -107,9 +107,9 @@ define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GCN-NEXT:    v_cndmask_b32_e64 v3, v4, v3, s[0:1]
 ; GCN-NEXT:    v_mov_b32_e32 v4, s11
 ; GCN-NEXT:    v_subb_u32_e32 v1, vcc, v4, v1, vcc
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s13, v1
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s13, v1
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s12, v0
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s12, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, v5, v2, s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, s13, v1
@@ -355,7 +355,7 @@ define i64 @v_test_srem(i64 %x, i64 %y) {
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
 ; GCN-IR-NEXT:    v_subb_u32_e64 v5, s[6:7], 0, 0, s[6:7]
-; GCN-IR-NEXT:    v_cmp_lt_u64_e64 s[6:7], 63, v[4:5]
+; GCN-IR-NEXT:    v_cmp_le_u64_e64 s[6:7], 63, v[4:5]
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
 ; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 63, v[4:5]
@@ -1019,9 +1019,9 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
 ; GCN-NEXT:    v_subb_u32_e64 v2, s[0:1], v2, v3, vcc
 ; GCN-NEXT:    v_subrev_i32_e64 v4, s[0:1], s8, v0
 ; GCN-NEXT:    v_subbrev_u32_e64 v5, s[2:3], 0, v2, s[0:1]
-; GCN-NEXT:    v_cmp_le_u32_e64 s[2:3], s9, v5
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[2:3], s9, v5
 ; GCN-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[2:3]
-; GCN-NEXT:    v_cmp_le_u32_e64 s[2:3], s8, v4
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[2:3], s8, v4
 ; GCN-NEXT:    v_subb_u32_e64 v2, s[0:1], v2, v3, s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[2:3]
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s[2:3], s9, v5
@@ -1032,9 +1032,9 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
 ; GCN-NEXT:    v_cndmask_b32_e64 v3, v4, v3, s[0:1]
 ; GCN-NEXT:    v_mov_b32_e32 v4, s11
 ; GCN-NEXT:    v_subb_u32_e32 v1, vcc, v4, v1, vcc
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s9, v1
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s9, v1
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, v5, v2, s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, s9, v1
@@ -1317,9 +1317,9 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x)
 ; GCN-NEXT:    v_subb_u32_e64 v2, s[0:1], v2, v3, vcc
 ; GCN-NEXT:    v_subrev_i32_e64 v4, s[0:1], s8, v0
 ; GCN-NEXT:    v_subbrev_u32_e64 v5, s[2:3], 0, v2, s[0:1]
-; GCN-NEXT:    v_cmp_le_u32_e64 s[2:3], s9, v5
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[2:3], s9, v5
 ; GCN-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[2:3]
-; GCN-NEXT:    v_cmp_le_u32_e64 s[2:3], s8, v4
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[2:3], s8, v4
 ; GCN-NEXT:    v_subb_u32_e64 v2, s[0:1], v2, v3, s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[2:3]
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s[2:3], s9, v5
@@ -1328,10 +1328,10 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x)
 ; GCN-NEXT:    v_subbrev_u32_e64 v2, s[0:1], 0, v2, s[0:1]
 ; GCN-NEXT:    v_subb_u32_e32 v1, vcc, 0, v1, vcc
 ; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v6
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s9, v1
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s9, v1
 ; GCN-NEXT:    v_cndmask_b32_e64 v3, v4, v3, s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, v5, v2, s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, s9, v1
@@ -1543,7 +1543,7 @@ define i64 @v_test_srem_k_num_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_add_i32_e32 v2, vcc, s6, v8
 ; GCN-IR-NEXT:    v_addc_u32_e64 v3, s[6:7], 0, -1, vcc
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
-; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[2:3]
+; GCN-IR-NEXT:    v_cmp_le_u64_e32 vcc, 63, v[2:3]
 ; GCN-IR-NEXT:    v_cmp_ne_u64_e64 s[6:7], 63, v[2:3]
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
 ; GCN-IR-NEXT:    v_cndmask_b32_e64 v4, 24, 0, s[4:5]
@@ -1734,7 +1734,7 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_add_i32_e32 v2, vcc, s6, v8
 ; GCN-IR-NEXT:    v_addc_u32_e64 v3, s[6:7], 0, -1, vcc
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
-; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[2:3]
+; GCN-IR-NEXT:    v_cmp_le_u64_e32 vcc, 63, v[2:3]
 ; GCN-IR-NEXT:    v_cmp_ne_u64_e64 s[6:7], 63, v[2:3]
 ; GCN-IR-NEXT:    v_mov_b32_e32 v4, 0x8000
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
@@ -1840,7 +1840,7 @@ define i64 @v_test_srem_pow2_k_den_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_sub_i32_e64 v2, s[4:5], 48, v8
 ; GCN-IR-NEXT:    v_subb_u32_e64 v3, s[4:5], 0, 0, s[4:5]
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN-IR-NEXT:    v_cmp_lt_u64_e64 s[4:5], 63, v[2:3]
+; GCN-IR-NEXT:    v_cmp_le_u64_e64 s[4:5], 63, v[2:3]
 ; GCN-IR-NEXT:    v_mov_b32_e32 v13, v12
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 63, v[2:3]
diff --git a/llvm/test/CodeGen/AMDGPU/srl64_reduce.ll b/llvm/test/CodeGen/AMDGPU/srl64_reduce.ll
index 3567bafe5b1ca..163752f7a0545 100644
--- a/llvm/test/CodeGen/AMDGPU/srl64_reduce.ll
+++ b/llvm/test/CodeGen/AMDGPU/srl64_reduce.ll
@@ -497,10 +497,10 @@ define i64 @srl_maxmin(i64 %arg0, i64 noundef %arg1) {
 ; CHECK-LABEL: srl_maxmin:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[2:3]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[2:3]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v2, 32, v2, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[2:3]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[2:3]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v2, 63, v2, vcc
 ; CHECK-NEXT:    v_lshrrev_b64 v[0:1], v2, v[0:1]
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -514,15 +514,15 @@ define <2 x i64> @srl_v2_maxmin(<2 x i64> %arg0, <2 x i64> noundef %arg1) {
 ; CHECK-LABEL: srl_v2_maxmin:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[4:5]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[4:5]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v4, 32, v4, vcc
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[6:7]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[6:7]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v6, 32, v6, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[6:7]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[6:7]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v6, 63, v6, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[4:5]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[4:5]
 ; CHECK-NEXT:    v_lshrrev_b64 v[2:3], v6, v[2:3]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v4, 63, v4, vcc
 ; CHECK-NEXT:    v_lshrrev_b64 v[0:1], v4, v[0:1]
@@ -537,21 +537,21 @@ define <3 x i64> @srl_v3_maxmin(<3 x i64> %arg0, <3 x i64> noundef %arg1) {
 ; CHECK-LABEL: srl_v3_maxmin:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[6:7]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[6:7]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v6, 32, v6, vcc
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[8:9]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[8:9]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v8, 32, v8, vcc
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[10:11]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[10:11]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v11, 0, v11, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v10, 32, v10, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[10:11]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[10:11]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v10, 63, v10, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[8:9]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[8:9]
 ; CHECK-NEXT:    v_lshrrev_b64 v[4:5], v10, v[4:5]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v8, 63, v8, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[6:7]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[6:7]
 ; CHECK-NEXT:    v_lshrrev_b64 v[2:3], v8, v[2:3]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v6, 63, v6, vcc
 ; CHECK-NEXT:    v_lshrrev_b64 v[0:1], v6, v[0:1]
@@ -566,27 +566,27 @@ define <4 x i64> @srl_v4_maxmin(<4 x i64> %arg0, <4 x i64> noundef %arg1) {
 ; CHECK-LABEL: srl_v4_maxmin:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[8:9]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[8:9]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v8, 32, v8, vcc
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[10:11]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[10:11]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v11, 0, v11, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v10, 32, v10, vcc
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[12:13]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[12:13]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v13, 0, v13, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v12, 32, v12, vcc
-; CHECK-NEXT:    v_cmp_lt_u64_e32 vcc, 32, v[14:15]
+; CHECK-NEXT:    v_cmp_le_u64_e32 vcc, 32, v[14:15]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v15, 0, v15, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v14, 32, v14, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[14:15]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[14:15]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v14, 63, v14, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[12:13]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[12:13]
 ; CHECK-NEXT:    v_lshrrev_b64 v[6:7], v14, v[6:7]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v12, 63, v12, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[10:11]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[10:11]
 ; CHECK-NEXT:    v_lshrrev_b64 v[4:5], v12, v[4:5]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v10, 63, v10, vcc
-; CHECK-NEXT:    v_cmp_gt_u64_e32 vcc, 63, v[8:9]
+; CHECK-NEXT:    v_cmp_ge_u64_e32 vcc, 63, v[8:9]
 ; CHECK-NEXT:    v_lshrrev_b64 v[2:3], v10, v[2:3]
 ; CHECK-NEXT:    v_cndmask_b32_e32 v8, 63, v8, vcc
 ; CHECK-NEXT:    v_lshrrev_b64 v[0:1], v8, v[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/ssubsat.ll
index 40d80f5e83e36..c1eb0a925160c 100644
--- a/llvm/test/CodeGen/AMDGPU/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/ssubsat.ll
@@ -120,7 +120,7 @@ define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) {
 ; GFX6-LABEL: v_ssubsat_i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v1
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v1
 ; GFX6-NEXT:    v_sub_i32_e64 v1, s[4:5], v0, v1
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v1, v0
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v1
@@ -132,7 +132,7 @@ define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) {
 ; GFX8-LABEL: v_ssubsat_i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v1
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v1
 ; GFX8-NEXT:    v_sub_u32_e64 v1, s[4:5], v0, v1
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v1, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v1
@@ -379,7 +379,7 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX6-LABEL: v_ssubsat_v2i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v2
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v2
 ; GFX6-NEXT:    v_sub_i32_e64 v2, s[4:5], v0, v2
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v2, v0
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v2
@@ -387,7 +387,7 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v2, s[4:5], v1, v3
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v3
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v3
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v2, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v2
 ; GFX6-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
@@ -398,7 +398,7 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX8-LABEL: v_ssubsat_v2i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v2
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v2
 ; GFX8-NEXT:    v_sub_u32_e64 v2, s[4:5], v0, v2
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v2, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v2
@@ -406,7 +406,7 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v2, s[4:5], v1, v3
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v3
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v3
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v2, v1
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v2
 ; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
@@ -435,7 +435,7 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX6-LABEL: v_ssubsat_v3i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v3
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v3
 ; GFX6-NEXT:    v_sub_i32_e64 v3, s[4:5], v0, v3
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v0
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
@@ -443,14 +443,14 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v3, s[4:5], v1, v4
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v4
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v4
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v3
 ; GFX6-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v3, s[4:5], v2, v5
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v5
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v5
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v2
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v3
 ; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
@@ -461,7 +461,7 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX8-LABEL: v_ssubsat_v3i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v3
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v3
 ; GFX8-NEXT:    v_sub_u32_e64 v3, s[4:5], v0, v3
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
@@ -469,14 +469,14 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v3, s[4:5], v1, v4
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v4
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v4
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v1
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v3
 ; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v3, s[4:5], v2, v5
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v5
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v5
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v2
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v3
 ; GFX8-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
@@ -507,7 +507,7 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX6-LABEL: v_ssubsat_v4i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v4
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v4
 ; GFX6-NEXT:    v_sub_i32_e64 v4, s[4:5], v0, v4
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v0
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v4
@@ -515,21 +515,21 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v4, s[4:5], v1, v5
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v5
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v5
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v4
 ; GFX6-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v4, s[4:5], v2, v6
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v6
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v6
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v2
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v4
 ; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v4, s[4:5], v3, v7
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v7
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v7
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v3
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v4
 ; GFX6-NEXT:    v_xor_b32_e32 v3, 0x80000000, v3
@@ -540,7 +540,7 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX8-LABEL: v_ssubsat_v4i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v4
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v4
 ; GFX8-NEXT:    v_sub_u32_e64 v4, s[4:5], v0, v4
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v4
@@ -548,21 +548,21 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v4, s[4:5], v1, v5
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v5
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v5
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v1
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v4
 ; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v4, s[4:5], v2, v6
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v6
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v6
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v2
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v4
 ; GFX8-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v4, s[4:5], v3, v7
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v7
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v7
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v3
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v4
 ; GFX8-NEXT:    v_xor_b32_e32 v3, 0x80000000, v3
@@ -595,7 +595,7 @@ define <8 x i32> @v_ssubsat_v8i32(<8 x i32> %lhs, <8 x i32> %rhs) {
 ; GFX6-LABEL: v_ssubsat_v8i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v8
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v8
 ; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v0, v8
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v0
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v8
@@ -603,49 +603,49 @@ define <8 x i32> @v_ssubsat_v8i32(<8 x i32> %lhs, <8 x i32> %rhs) {
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v1, v9
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v9
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v9
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v8
 ; GFX6-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v2, v10
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v10
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v10
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v2
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v8
 ; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v3, v11
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v11
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v11
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v3
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v8
 ; GFX6-NEXT:    v_xor_b32_e32 v3, 0x80000000, v3
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v3, v8, v3, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v4, v12
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v12
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v12
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v4
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v4, 31, v8
 ; GFX6-NEXT:    v_xor_b32_e32 v4, 0x80000000, v4
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v5, v13
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v13
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v13
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v5
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v5, 31, v8
 ; GFX6-NEXT:    v_xor_b32_e32 v5, 0x80000000, v5
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v5, v8, v5, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v6, v14
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v14
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v14
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v6
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v6, 31, v8
 ; GFX6-NEXT:    v_xor_b32_e32 v6, 0x80000000, v6
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v6, v8, v6, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v7, v15
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v15
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v15
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v7
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v7, 31, v8
 ; GFX6-NEXT:    v_xor_b32_e32 v7, 0x80000000, v7
@@ -656,7 +656,7 @@ define <8 x i32> @v_ssubsat_v8i32(<8 x i32> %lhs, <8 x i32> %rhs) {
 ; GFX8-LABEL: v_ssubsat_v8i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v8
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v8
 ; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v0, v8
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v8
@@ -664,49 +664,49 @@ define <8 x i32> @v_ssubsat_v8i32(<8 x i32> %lhs, <8 x i32> %rhs) {
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v1, v9
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v9
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v9
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v1
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v8
 ; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v2, v10
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v10
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v10
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v2
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v8
 ; GFX8-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v3, v11
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v11
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v11
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v3
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v8
 ; GFX8-NEXT:    v_xor_b32_e32 v3, 0x80000000, v3
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v8, v3, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v4, v12
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v12
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v12
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v4
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v4, 31, v8
 ; GFX8-NEXT:    v_xor_b32_e32 v4, 0x80000000, v4
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v5, v13
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v13
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v13
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v5
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v5, 31, v8
 ; GFX8-NEXT:    v_xor_b32_e32 v5, 0x80000000, v5
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v5, v8, v5, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v6, v14
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v14
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v14
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v6
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v6, 31, v8
 ; GFX8-NEXT:    v_xor_b32_e32 v6, 0x80000000, v6
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v8, v6, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v7, v15
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v15
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v15
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v7
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v7, 31, v8
 ; GFX8-NEXT:    v_xor_b32_e32 v7, 0x80000000, v7
@@ -747,7 +747,7 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX6-LABEL: v_ssubsat_v16i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v16
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v16
 ; GFX6-NEXT:    v_sub_i32_e64 v16, s[4:5], v0, v16
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v0
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v16
@@ -755,28 +755,28 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v16, v0, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v16, s[4:5], v1, v17
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v17
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v17
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v16
 ; GFX6-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v16, v1, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v16, s[4:5], v2, v18
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v18
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v18
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v2
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v16
 ; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v16, v2, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v16, s[4:5], v3, v19
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v19
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v19
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v3
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v16
 ; GFX6-NEXT:    v_xor_b32_e32 v3, 0x80000000, v3
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v3, v16, v3, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v16, s[4:5], v4, v20
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v20
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v20
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v4
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v4, 31, v16
 ; GFX6-NEXT:    v_xor_b32_e32 v4, 0x80000000, v4
@@ -784,77 +784,77 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v16, v4, vcc
 ; GFX6-NEXT:    buffer_load_dword v16, off, s[0:3], s32
 ; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v5, v21
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v21
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v21
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v5
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v5, 31, v17
 ; GFX6-NEXT:    v_xor_b32_e32 v5, 0x80000000, v5
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v5, v17, v5, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v6, v22
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v22
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v22
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v6
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v6, 31, v17
 ; GFX6-NEXT:    v_xor_b32_e32 v6, 0x80000000, v6
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v6, v17, v6, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v7, v23
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v23
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v23
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v7
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v7, 31, v17
 ; GFX6-NEXT:    v_xor_b32_e32 v7, 0x80000000, v7
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v7, v17, v7, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v8, v24
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v24
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v24
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v8
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v8, 31, v17
 ; GFX6-NEXT:    v_xor_b32_e32 v8, 0x80000000, v8
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v8, v17, v8, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v9, v25
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v25
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v25
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v9
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v9, 31, v17
 ; GFX6-NEXT:    v_xor_b32_e32 v9, 0x80000000, v9
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v9, v17, v9, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v10, v26
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v26
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v26
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v10
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v10, 31, v17
 ; GFX6-NEXT:    v_xor_b32_e32 v10, 0x80000000, v10
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v10, v17, v10, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v11, v27
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v27
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v27
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v11
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v11, 31, v17
 ; GFX6-NEXT:    v_xor_b32_e32 v11, 0x80000000, v11
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v11, v17, v11, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v12, v28
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v28
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v28
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v12
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v12, 31, v17
 ; GFX6-NEXT:    v_xor_b32_e32 v12, 0x80000000, v12
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v12, v17, v12, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v13, v29
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v29
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v29
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v13
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v13, 31, v17
 ; GFX6-NEXT:    v_xor_b32_e32 v13, 0x80000000, v13
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v13, v17, v13, vcc
 ; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v14, v30
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v30
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v30
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v14
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v14, 31, v17
 ; GFX6-NEXT:    v_xor_b32_e32 v14, 0x80000000, v14
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v14, v17, v14, vcc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v16
+; GFX6-NEXT:    v_cmp_le_i32_e32 vcc, 0, v16
 ; GFX6-NEXT:    v_sub_i32_e64 v16, s[4:5], v15, v16
 ; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v15
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v15, 31, v16
@@ -866,7 +866,7 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX8-LABEL: v_ssubsat_v16i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v16
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v16
 ; GFX8-NEXT:    v_sub_u32_e64 v16, s[4:5], v0, v16
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v16
@@ -874,28 +874,28 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v16, v0, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v16, s[4:5], v1, v17
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v17
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v17
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v1
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v16
 ; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v16, v1, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v16, s[4:5], v2, v18
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v18
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v18
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v2
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v16
 ; GFX8-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v16, v2, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v16, s[4:5], v3, v19
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v19
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v19
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v3
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v16
 ; GFX8-NEXT:    v_xor_b32_e32 v3, 0x80000000, v3
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v16, v3, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v16, s[4:5], v4, v20
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v20
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v20
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v4
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v4, 31, v16
 ; GFX8-NEXT:    v_xor_b32_e32 v4, 0x80000000, v4
@@ -903,77 +903,77 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v16, v4, vcc
 ; GFX8-NEXT:    buffer_load_dword v16, off, s[0:3], s32
 ; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v5, v21
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v21
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v21
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v5
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v5, 31, v17
 ; GFX8-NEXT:    v_xor_b32_e32 v5, 0x80000000, v5
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v5, v17, v5, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v6, v22
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v22
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v22
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v6
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v6, 31, v17
 ; GFX8-NEXT:    v_xor_b32_e32 v6, 0x80000000, v6
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v17, v6, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v7, v23
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v23
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v23
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v7
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v7, 31, v17
 ; GFX8-NEXT:    v_xor_b32_e32 v7, 0x80000000, v7
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v17, v7, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v8, v24
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v24
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v24
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v8
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v8, 31, v17
 ; GFX8-NEXT:    v_xor_b32_e32 v8, 0x80000000, v8
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v8, v17, v8, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v9, v25
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v25
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v25
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v9
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v9, 31, v17
 ; GFX8-NEXT:    v_xor_b32_e32 v9, 0x80000000, v9
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v9, v17, v9, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v10, v26
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v26
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v26
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v10
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v10, 31, v17
 ; GFX8-NEXT:    v_xor_b32_e32 v10, 0x80000000, v10
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v10, v17, v10, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v11, v27
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v27
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v27
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v11
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v11, 31, v17
 ; GFX8-NEXT:    v_xor_b32_e32 v11, 0x80000000, v11
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v11, v17, v11, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v12, v28
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v28
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v28
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v12
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v12, 31, v17
 ; GFX8-NEXT:    v_xor_b32_e32 v12, 0x80000000, v12
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v12, v17, v12, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v13, v29
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v29
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v29
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v13
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v13, 31, v17
 ; GFX8-NEXT:    v_xor_b32_e32 v13, 0x80000000, v13
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v13, v17, v13, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v14, v30
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v30
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v30
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v14
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v14, 31, v17
 ; GFX8-NEXT:    v_xor_b32_e32 v14, 0x80000000, v14
 ; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v14, v17, v14, vcc
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v16
+; GFX8-NEXT:    v_cmp_le_i32_e32 vcc, 0, v16
 ; GFX8-NEXT:    v_sub_u32_e64 v16, s[4:5], v15, v16
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v15
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v15, 31, v16
@@ -1062,7 +1062,7 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
 ; GFX6-NEXT:    v_subb_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
+; GFX6-NEXT:    v_cmp_le_i64_e64 s[4:5], 0, v[2:3]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v5
 ; GFX6-NEXT:    s_xor_b64 vcc, s[4:5], vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
@@ -1076,7 +1076,7 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v0, v2
 ; GFX8-NEXT:    v_subb_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
+; GFX8-NEXT:    v_cmp_le_i64_e64 s[4:5], 0, v[2:3]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v5
 ; GFX8-NEXT:    s_xor_b64 vcc, s[4:5], vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
@@ -1090,7 +1090,7 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX9-NEXT:    v_sub_co_u32_e32 v4, vcc, v0, v2
 ; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
+; GFX9-NEXT:    v_cmp_le_i64_e64 s[4:5], 0, v[2:3]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v1, 31, v5
 ; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
@@ -1103,7 +1103,7 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_sub_co_u32 v4, vcc_lo, v0, v2
 ; GFX10-NEXT:    v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_lt_i64_e64 s4, 0, v[2:3]
+; GFX10-NEXT:    v_cmp_le_i64_e64 s4, 0, v[2:3]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
 ; GFX10-NEXT:    v_xor_b32_e32 v1, 0x80000000, v6
@@ -1117,7 +1117,7 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_sub_co_u32 v4, vcc_lo, v0, v2
 ; GFX11-NEXT:    v_sub_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, 0, v[2:3]
+; GFX11-NEXT:    v_cmp_le_i64_e64 s0, 0, v[2:3]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
 ; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
 ; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v6
diff --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
index d23e314b9465f..644c63f7ef785 100644
--- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
+++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
@@ -86,9 +86,9 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS1-NEXT:    s_mov_b32 s32, 0
 ; GLOBALNESS1-NEXT:    ; implicit-def: $vgpr56_vgpr57
 ; GLOBALNESS1-NEXT:    s_waitcnt vmcnt(0)
-; GLOBALNESS1-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v2
+; GLOBALNESS1-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v2
 ; GLOBALNESS1-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GLOBALNESS1-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v2
+; GLOBALNESS1-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v2
 ; GLOBALNESS1-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GLOBALNESS1-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
 ; GLOBALNESS1-NEXT:    v_cmp_ne_u32_e64 s[4:5], 1, v0
@@ -178,7 +178,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_4 Depth=1
 ; GLOBALNESS1-NEXT:    flat_load_dword v0, v[44:45]
 ; GLOBALNESS1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GLOBALNESS1-NEXT:    v_cmp_gt_i32_e64 s[86:87], 0, v0
+; GLOBALNESS1-NEXT:    v_cmp_ge_i32_e64 s[86:87], 0, v0
 ; GLOBALNESS1-NEXT:    v_mov_b32_e32 v0, 0
 ; GLOBALNESS1-NEXT:    v_mov_b32_e32 v1, 0x3ff00000
 ; GLOBALNESS1-NEXT:    s_and_saveexec_b64 s[52:53], s[86:87]
@@ -400,9 +400,9 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS0-NEXT:    s_mov_b32 s32, 0
 ; GLOBALNESS0-NEXT:    ; implicit-def: $vgpr56_vgpr57
 ; GLOBALNESS0-NEXT:    s_waitcnt vmcnt(0)
-; GLOBALNESS0-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v2
+; GLOBALNESS0-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v2
 ; GLOBALNESS0-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GLOBALNESS0-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v2
+; GLOBALNESS0-NEXT:    v_cmp_ge_i32_e32 vcc, 1, v2
 ; GLOBALNESS0-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GLOBALNESS0-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
 ; GLOBALNESS0-NEXT:    v_cmp_ne_u32_e64 s[4:5], 1, v0
@@ -492,7 +492,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_4 Depth=1
 ; GLOBALNESS0-NEXT:    flat_load_dword v0, v[44:45]
 ; GLOBALNESS0-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GLOBALNESS0-NEXT:    v_cmp_gt_i32_e64 s[86:87], 0, v0
+; GLOBALNESS0-NEXT:    v_cmp_ge_i32_e64 s[86:87], 0, v0
 ; GLOBALNESS0-NEXT:    v_mov_b32_e32 v0, 0
 ; GLOBALNESS0-NEXT:    v_mov_b32_e32 v1, 0x3ff00000
 ; GLOBALNESS0-NEXT:    s_and_saveexec_b64 s[52:53], s[86:87]
diff --git a/llvm/test/CodeGen/AMDGPU/uaddo.ll b/llvm/test/CodeGen/AMDGPU/uaddo.ll
index 6606b1d050421..c4cb8cb492051 100644
--- a/llvm/test/CodeGen/AMDGPU/uaddo.ll
+++ b/llvm/test/CodeGen/AMDGPU/uaddo.ll
@@ -823,10 +823,10 @@ define amdgpu_cs void @sv_uaddo_i128(ptr addrspace(1) %out, i128 inreg %a, i128
 ; SI-NEXT:    s_mov_b32 s5, s6
 ; SI-NEXT:    v_addc_u32_e32 v3, vcc, v6, v3, vcc
 ; SI-NEXT:    v_addc_u32_e32 v4, vcc, v7, v4, vcc
-; SI-NEXT:    v_cmp_gt_u64_e64 s[0:1], s[0:1], v[2:3]
+; SI-NEXT:    v_cmp_ge_u64_e64 s[0:1], s[0:1], v[2:3]
 ; SI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
 ; SI-NEXT:    v_addc_u32_e32 v5, vcc, v8, v5, vcc
-; SI-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[4:5]
+; SI-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[4:5]
 ; SI-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
 ; SI-NEXT:    v_cmp_eq_u64_e32 vcc, s[2:3], v[4:5]
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
@@ -843,9 +843,9 @@ define amdgpu_cs void @sv_uaddo_i128(ptr addrspace(1) %out, i128 inreg %a, i128
 ; VI-NEXT:    v_addc_u32_e32 v4, vcc, v6, v4, vcc
 ; VI-NEXT:    v_mov_b32_e32 v6, s3
 ; VI-NEXT:    v_addc_u32_e32 v5, vcc, v6, v5, vcc
-; VI-NEXT:    v_cmp_gt_u64_e32 vcc, s[0:1], v[2:3]
+; VI-NEXT:    v_cmp_ge_u64_e32 vcc, s[0:1], v[2:3]
 ; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[4:5]
+; VI-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[4:5]
 ; VI-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
 ; VI-NEXT:    v_cmp_eq_u64_e32 vcc, s[2:3], v[4:5]
 ; VI-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
@@ -862,9 +862,9 @@ define amdgpu_cs void @sv_uaddo_i128(ptr addrspace(1) %out, i128 inreg %a, i128
 ; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v6, v4, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v6, s3
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v6, v5, vcc
-; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, s[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, s[0:1], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[4:5]
+; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, s[2:3], v[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll
index f1d7c84836ca7..fffdf025d246e 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll
@@ -94,9 +94,9 @@ define amdgpu_kernel void @s_test_udiv_i64(ptr addrspace(1) %out, i64 %x, i64 %y
 ; GCN-NEXT:    v_subb_u32_e64 v4, s[0:1], v4, v5, vcc
 ; GCN-NEXT:    v_subrev_i32_e64 v5, s[0:1], s8, v3
 ; GCN-NEXT:    v_subbrev_u32_e64 v4, s[0:1], 0, v4, s[0:1]
-; GCN-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v4
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[0:1], s9, v4
 ; GCN-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[0:1]
-; GCN-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v5
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[0:1], s8, v5
 ; GCN-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v4
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, v6, v5, s[0:1]
@@ -109,9 +109,9 @@ define amdgpu_kernel void @s_test_udiv_i64(ptr addrspace(1) %out, i64 %x, i64 %y
 ; GCN-NEXT:    v_cndmask_b32_e64 v5, v6, v8, s[0:1]
 ; GCN-NEXT:    v_mov_b32_e32 v6, s3
 ; GCN-NEXT:    v_subb_u32_e32 v2, vcc, v6, v2, vcc
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s9, v2
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s9, v2
 ; GCN-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s8, v3
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v3
 ; GCN-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, s9, v2
 ; GCN-NEXT:    v_cndmask_b32_e32 v2, v6, v3, vcc
@@ -322,7 +322,7 @@ define i64 @v_test_udiv_i64(i64 %x, i64 %y) {
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
 ; GCN-IR-NEXT:    v_subb_u32_e64 v9, s[6:7], 0, 0, s[6:7]
-; GCN-IR-NEXT:    v_cmp_lt_u64_e64 s[6:7], 63, v[8:9]
+; GCN-IR-NEXT:    v_cmp_le_u64_e64 s[6:7], 63, v[8:9]
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
 ; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 63, v[8:9]
@@ -863,9 +863,9 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
 ; GCN-NEXT:    v_subb_u32_e64 v3, s[0:1], v3, v4, vcc
 ; GCN-NEXT:    v_subrev_i32_e64 v4, s[0:1], s2, v2
 ; GCN-NEXT:    v_subbrev_u32_e64 v3, s[0:1], 0, v3, s[0:1]
-; GCN-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v3
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[0:1], s3, v3
 ; GCN-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
-; GCN-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v4
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[0:1], s2, v4
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[0:1]
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s[0:1], s3, v3
 ; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, v4, s[0:1]
@@ -875,11 +875,11 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
 ; GCN-NEXT:    v_addc_u32_e64 v7, s[0:1], 0, 0, s[0:1]
 ; GCN-NEXT:    v_subb_u32_e32 v1, vcc, 0, v1, vcc
 ; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v3
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s3, v1
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s3, v1
 ; GCN-NEXT:    v_cndmask_b32_e64 v3, v4, v6, s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, v5, v7, s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s2, v2
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v2
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v1
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
@@ -1069,7 +1069,7 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_add_i32_e32 v6, vcc, 0xffffffd0, v14
 ; GCN-IR-NEXT:    v_addc_u32_e64 v7, s[6:7], 0, -1, vcc
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
-; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[6:7]
+; GCN-IR-NEXT:    v_cmp_le_u64_e32 vcc, 63, v[6:7]
 ; GCN-IR-NEXT:    v_cmp_ne_u64_e64 s[6:7], 63, v[6:7]
 ; GCN-IR-NEXT:    v_mov_b32_e32 v3, 0x8000
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
@@ -1159,7 +1159,7 @@ define i64 @v_test_udiv_pow2_k_den_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_sub_i32_e64 v6, s[4:5], 48, v10
 ; GCN-IR-NEXT:    v_subb_u32_e64 v7, s[4:5], 0, 0, s[4:5]
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN-IR-NEXT:    v_cmp_lt_u64_e64 s[4:5], 63, v[6:7]
+; GCN-IR-NEXT:    v_cmp_le_u64_e64 s[4:5], 63, v[6:7]
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 63, v[6:7]
 ; GCN-IR-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
@@ -1359,7 +1359,7 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_sub_i32_e64 v6, s[4:5], 59, v10
 ; GCN-IR-NEXT:    v_subb_u32_e64 v7, s[4:5], 0, 0, s[4:5]
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN-IR-NEXT:    v_cmp_lt_u64_e64 s[4:5], 63, v[6:7]
+; GCN-IR-NEXT:    v_cmp_le_u64_e64 s[4:5], 63, v[6:7]
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 63, v[6:7]
 ; GCN-IR-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
diff --git a/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll b/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll
index d0d1ba82dc000..65a990c1e7b82 100644
--- a/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll
+++ b/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll
@@ -77,7 +77,7 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
 ; CHECK-NEXT:    flat_load_dwordx2 v[0:1], v[56:57] glc
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    v_mov_b32_e32 v0, s64
-; CHECK-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v42
+; CHECK-NEXT:    v_cmp_le_i32_e32 vcc, 0, v42
 ; CHECK-NEXT:    flat_store_dwordx2 v[58:59], v[62:63]
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    flat_store_dwordx2 v[58:59], v[46:47]
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
index 374c6701f1ba6..aa208038909fd 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
@@ -275,7 +275,7 @@ define amdgpu_kernel void @uniform_if_move_valu_commute(ptr addrspace(1) %out, f
 ; SI-NEXT:    v_mov_b32_e32 v0, 0x41200000
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    v_add_f32_e32 v0, s0, v0
-; SI-NEXT:    v_cmp_gt_u32_e32 vcc, 6, v0
+; SI-NEXT:    v_cmp_ge_u32_e32 vcc, 6, v0
 ; SI-NEXT:    s_cbranch_vccnz .LBB5_2
 ; SI-NEXT:  ; %bb.1: ; %if
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
@@ -293,7 +293,7 @@ define amdgpu_kernel void @uniform_if_move_valu_commute(ptr addrspace(1) %out, f
 ; VI-NEXT:    v_mov_b32_e32 v0, 0x41200000
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_add_f32_e32 v0, s0, v0
-; VI-NEXT:    v_cmp_gt_u32_e32 vcc, 6, v0
+; VI-NEXT:    v_cmp_ge_u32_e32 vcc, 6, v0
 ; VI-NEXT:    s_cbranch_vccnz .LBB5_2
 ; VI-NEXT:  ; %bb.1: ; %if
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
@@ -601,7 +601,7 @@ done:
 define amdgpu_kernel void @uniform_inside_divergent(ptr addrspace(1) %out, i32 %cond) {
 ; SI-LABEL: uniform_inside_divergent:
 ; SI:       ; %bb.0: ; %entry
-; SI-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v0
+; SI-NEXT:    v_cmp_ge_u32_e32 vcc, 16, v0
 ; SI-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; SI-NEXT:    s_cbranch_execz .LBB11_2
 ; SI-NEXT:  ; %bb.1: ; %if
@@ -624,7 +624,7 @@ define amdgpu_kernel void @uniform_inside_divergent(ptr addrspace(1) %out, i32 %
 ;
 ; VI-LABEL: uniform_inside_divergent:
 ; VI:       ; %bb.0: ; %entry
-; VI-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v0
+; VI-NEXT:    v_cmp_ge_u32_e32 vcc, 16, v0
 ; VI-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; VI-NEXT:    s_cbranch_execz .LBB11_2
 ; VI-NEXT:  ; %bb.1: ; %if
@@ -675,7 +675,7 @@ define amdgpu_kernel void @divergent_inside_uniform(ptr addrspace(1) %out, i32 %
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
-; SI-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v0
+; SI-NEXT:    v_cmp_ge_u32_e32 vcc, 16, v0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    buffer_store_dword v1, off, s[0:3], 0
 ; SI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
@@ -698,7 +698,7 @@ define amdgpu_kernel void @divergent_inside_uniform(ptr addrspace(1) %out, i32 %
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    v_mov_b32_e32 v1, 0
-; VI-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v0
+; VI-NEXT:    v_cmp_ge_u32_e32 vcc, 16, v0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    buffer_store_dword v1, off, s[0:3], 0
 ; VI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
index e0d16987b31e7..4759f4e17de14 100644
--- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
+++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
@@ -36,7 +36,7 @@ define hidden void @widget() {
 ; GCN-NEXT:    s_mov_b64 s[20:21], -1
 ; GCN-NEXT:    s_mov_b64 s[16:17], 0
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_gt_i32_e32 vcc, 21, v0
+; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, 21, v0
 ; GCN-NEXT:    s_mov_b64 s[54:55], 0
 ; GCN-NEXT:    s_mov_b64 s[18:19], 0
 ; GCN-NEXT:    s_cbranch_vccz .LBB0_9
@@ -326,7 +326,7 @@ define hidden void @blam() {
 ; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], 0
 ; GCN-NEXT:    s_mov_b64 s[6:7], 0
 ; GCN-NEXT:    s_waitcnt vmcnt(1)
-; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, 2, v0
+; GCN-NEXT:    v_cmp_le_i32_e32 vcc, 2, v0
 ; GCN-NEXT:    s_mov_b64 s[4:5], -1
 ; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GCN-NEXT:    s_xor_b64 s[70:71], exec, s[8:9]
diff --git a/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll
index a0dd0e7e78f9d..113427c9d648d 100644
--- a/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll
@@ -9,7 +9,7 @@ define i1 @test_urem_odd(i13 %X) nounwind {
 ; CHECK-NEXT:    s_movk_i32 s4, 0x667
 ; CHECK-NEXT:    v_mul_u32_u24_e32 v0, 0xccd, v0
 ; CHECK-NEXT:    v_and_b32_e32 v0, 0x1fff, v0
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, s4, v0
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, s4, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %urem = urem i13 %X, 5
@@ -28,7 +28,7 @@ define i1 @test_urem_even(i27 %X) nounwind {
 ; CHECK-NEXT:    v_bfe_u32 v0, v0, 1, 26
 ; CHECK-NEXT:    v_or_b32_e32 v0, v0, v1
 ; CHECK-NEXT:    v_and_b32_e32 v0, 0x7ffffff, v0
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, s5, v0
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, s5, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %urem = urem i27 %X, 14
@@ -43,7 +43,7 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind {
 ; CHECK-NEXT:    v_and_b32_e32 v0, 15, v0
 ; CHECK-NEXT:    v_mul_u32_u24_e32 v0, 13, v0
 ; CHECK-NEXT:    v_and_b32_e32 v0, 15, v0
-; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, 3, v0
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 3, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %urem = urem i4 %X, 5
@@ -58,7 +58,7 @@ define i1 @test_urem_negative_odd(i9 %X) nounwind {
 ; CHECK-NEXT:    v_and_b32_e32 v0, 0x1ff, v0
 ; CHECK-NEXT:    v_mul_u32_u24_e32 v0, 0x133, v0
 ; CHECK-NEXT:    v_and_b32_e32 v0, 0x1ff, v0
-; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, 1, v0
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 1, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %urem = urem i9 %X, -5
@@ -85,11 +85,11 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0xf9dc299a, v2
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, 0x49249249, v1
 ; CHECK-NEXT:    v_alignbit_b32 v0, v0, v0, 1
-; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s9, v0
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s9, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s7, v1
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s7, v1
 ; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CHECK-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v2
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s5, v2
 ; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %urem = urem <3 x i11> %X, <i11 6, i11 7, i11 -5>
diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll
index bd742968ba37b..6c455e1fce376 100644
--- a/llvm/test/CodeGen/AMDGPU/urem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/urem64.ll
@@ -94,9 +94,9 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y
 ; GCN-NEXT:    v_subb_u32_e64 v2, s[0:1], v2, v3, vcc
 ; GCN-NEXT:    v_subrev_i32_e64 v4, s[0:1], s12, v0
 ; GCN-NEXT:    v_subbrev_u32_e64 v5, s[2:3], 0, v2, s[0:1]
-; GCN-NEXT:    v_cmp_le_u32_e64 s[2:3], s13, v5
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[2:3], s13, v5
 ; GCN-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[2:3]
-; GCN-NEXT:    v_cmp_le_u32_e64 s[2:3], s12, v4
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[2:3], s12, v4
 ; GCN-NEXT:    v_subb_u32_e64 v2, s[0:1], v2, v3, s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[2:3]
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s[2:3], s13, v5
@@ -107,9 +107,9 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y
 ; GCN-NEXT:    v_cndmask_b32_e64 v3, v4, v3, s[0:1]
 ; GCN-NEXT:    v_mov_b32_e32 v4, s11
 ; GCN-NEXT:    v_subb_u32_e32 v1, vcc, v4, v1, vcc
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s13, v1
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s13, v1
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s12, v0
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s12, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, v5, v2, s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, s13, v1
@@ -331,7 +331,7 @@ define i64 @v_test_urem_i64(i64 %x, i64 %y) {
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
 ; GCN-IR-NEXT:    v_subb_u32_e64 v5, s[6:7], 0, 0, s[6:7]
-; GCN-IR-NEXT:    v_cmp_lt_u64_e64 s[6:7], 63, v[4:5]
+; GCN-IR-NEXT:    v_cmp_le_u64_e64 s[6:7], 63, v[4:5]
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
 ; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 63, v[4:5]
@@ -843,9 +843,9 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x)
 ; GCN-NEXT:    v_subb_u32_e64 v2, s[0:1], v2, v3, vcc
 ; GCN-NEXT:    v_subrev_i32_e64 v4, s[0:1], s6, v0
 ; GCN-NEXT:    v_subbrev_u32_e64 v5, s[2:3], 0, v2, s[0:1]
-; GCN-NEXT:    v_cmp_le_u32_e64 s[2:3], s7, v5
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[2:3], s7, v5
 ; GCN-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[2:3]
-; GCN-NEXT:    v_cmp_le_u32_e64 s[2:3], s6, v4
+; GCN-NEXT:    v_cmp_lt_u32_e64 s[2:3], s6, v4
 ; GCN-NEXT:    v_subb_u32_e64 v2, s[0:1], v2, v3, s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[2:3]
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s[2:3], s7, v5
@@ -854,10 +854,10 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x)
 ; GCN-NEXT:    v_subbrev_u32_e64 v2, s[0:1], 0, v2, s[0:1]
 ; GCN-NEXT:    v_subb_u32_e32 v1, vcc, 0, v1, vcc
 ; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v6
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s7, v1
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s7, v1
 ; GCN-NEXT:    v_cndmask_b32_e64 v3, v4, v3, s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
-; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, v5, v2, s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, s7, v1
@@ -1170,7 +1170,7 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_add_i32_e32 v2, vcc, 0xffffffd0, v8
 ; GCN-IR-NEXT:    v_addc_u32_e64 v3, s[6:7], 0, -1, vcc
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
-; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[2:3]
+; GCN-IR-NEXT:    v_cmp_le_u64_e32 vcc, 63, v[2:3]
 ; GCN-IR-NEXT:    v_cmp_ne_u64_e64 s[6:7], 63, v[2:3]
 ; GCN-IR-NEXT:    v_mov_b32_e32 v4, 0x8000
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
@@ -1266,7 +1266,7 @@ define i64 @v_test_urem_pow2_k_den_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_sub_i32_e64 v2, s[4:5], 48, v8
 ; GCN-IR-NEXT:    v_subb_u32_e64 v3, s[4:5], 0, 0, s[4:5]
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN-IR-NEXT:    v_cmp_lt_u64_e64 s[4:5], 63, v[2:3]
+; GCN-IR-NEXT:    v_cmp_le_u64_e64 s[4:5], 63, v[2:3]
 ; GCN-IR-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 63, v[2:3]
 ; GCN-IR-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
diff --git a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
index a41063f467d01..3856d6d3a48e9 100644
--- a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
@@ -937,7 +937,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(ptr addrspace(1) %o
 ; SI-NEXT:    buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    s_mov_b64 s[2:3], s[10:11]
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v2
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, -1, v2
 ; SI-NEXT:    v_cndmask_b32_e32 v2, 2, v3, vcc
 ; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -961,7 +961,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(ptr addrspace(1) %o
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v4
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v5
+; VI-NEXT:    v_cmp_le_i32_e32 vcc, -1, v5
 ; VI-NEXT:    v_cndmask_b32_e32 v2, 2, v2, vcc
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
@@ -977,7 +977,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(ptr addrspace(1) %o
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_load_dword v2, v0, s[6:7] glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v1
+; GFX10-NEXT:    v_cmp_le_i32_e32 vcc, -1, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, 2, v2, vcc
 ; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
@@ -995,7 +995,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v1
+; GFX11-NEXT:    v_cmp_le_i32_e32 vcc, -1, v1
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, 2, v2, vcc
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_endpgm
@@ -1013,7 +1013,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(ptr addrspace(1) %o
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    global_load_b32 v2, v0, s[4:5] scope:SCOPE_SYS
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v1
+; GFX12-NEXT:    v_cmp_le_i32_e32 vcc, -1, v1
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, 2, v2, vcc
 ; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX12-NEXT:    s_endpgm
@@ -1047,7 +1047,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(ptr addrspace(1) %o
 ; SI-NEXT:    buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64 glc
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    s_mov_b64 s[2:3], s[10:11]
-; SI-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[2:3]
+; SI-NEXT:    v_cmp_le_i64_e32 vcc, -1, v[2:3]
 ; SI-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v2, 2, v4, vcc
 ; SI-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
@@ -1072,7 +1072,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(ptr addrspace(1) %o
 ; VI-NEXT:    v_mov_b32_e32 v5, s1
 ; VI-NEXT:    v_add_u32_e32 v4, vcc, s0, v4
 ; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
-; VI-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[0:1]
+; VI-NEXT:    v_cmp_le_i64_e32 vcc, -1, v[0:1]
 ; VI-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc
 ; VI-NEXT:    v_cndmask_b32_e32 v0, 2, v2, vcc
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
@@ -1089,7 +1089,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(ptr addrspace(1) %o
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_load_dwordx2 v[2:3], v4, s[6:7] glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[0:1]
+; GFX10-NEXT:    v_cmp_le_i64_e32 vcc, -1, v[0:1]
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, 2, v2, vcc
 ; GFX10-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -1108,7 +1108,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    global_load_b64 v[2:3], v4, s[4:5] glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[0:1]
+; GFX11-NEXT:    v_cmp_le_i64_e32 vcc, -1, v[0:1]
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, 2, v2, vcc
 ; GFX11-NEXT:    global_store_b64 v4, v[0:1], s[0:1]
@@ -1127,7 +1127,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(ptr addrspace(1) %o
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    global_load_b64 v[2:3], v4, s[4:5] scope:SCOPE_SYS
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[0:1]
+; GFX12-NEXT:    v_cmp_le_i64_e32 vcc, -1, v[0:1]
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc
 ; GFX12-NEXT:    v_cndmask_b32_e32 v0, 2, v2, vcc
 ; GFX12-NEXT:    global_store_b64 v4, v[0:1], s[0:1]
@@ -1559,7 +1559,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(ptr addrspace(1) %ou
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; SI-NEXT:    v_and_b32_e32 v3, 1, v3
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v2
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v2
 ; SI-NEXT:    v_cmp_eq_u32_e64 s[0:1], 1, v3
 ; SI-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
 ; SI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
@@ -1585,7 +1585,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(ptr addrspace(1) %ou
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v2
+; VI-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v2
 ; VI-NEXT:    v_and_b32_e32 v3, 1, v3
 ; VI-NEXT:    v_cmp_eq_u32_e64 s[0:1], 1, v3
 ; VI-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
@@ -1604,7 +1604,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(ptr addrspace(1) %ou
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_load_ubyte v3, v0, s[0:1] glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v2
+; GFX10-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v2
 ; GFX10-NEXT:    v_and_b32_e32 v1, 1, v3
 ; GFX10-NEXT:    v_cmp_eq_u32_e64 s[0:1], 1, v1
 ; GFX10-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
@@ -1625,7 +1625,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(ptr addrspace(1) %ou
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    global_load_u8 v2, v0, s[0:1] glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v1
+; GFX11-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v1
 ; GFX11-NEXT:    v_and_b32_e32 v2, 1, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    v_cmp_eq_u32_e64 s[0:1], 1, v2
@@ -1647,7 +1647,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(ptr addrspace(1) %ou
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    global_load_u8 v2, v0, s[0:1] scope:SCOPE_SYS
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v1
+; GFX12-NEXT:    v_cmp_ge_i32_e32 vcc, 0, v1
 ; GFX12-NEXT:    v_and_b32_e32 v2, 1, v2
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    v_cmp_eq_u32_e64 s[0:1], 1, v2
@@ -1932,7 +1932,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(ptr addrspace(1)
 ; SI-NEXT:    buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    s_mov_b64 s[2:3], s[10:11]
-; SI-NEXT:    v_cmp_gt_u32_e32 vcc, 2, v2
+; SI-NEXT:    v_cmp_ge_u32_e32 vcc, 2, v2
 ; SI-NEXT:    v_cndmask_b32_e32 v2, 4.0, v3, vcc
 ; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -1956,7 +1956,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(ptr addrspace(1)
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v4
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_cmp_gt_u32_e32 vcc, 2, v5
+; VI-NEXT:    v_cmp_ge_u32_e32 vcc, 2, v5
 ; VI-NEXT:    v_cndmask_b32_e32 v2, 4.0, v2, vcc
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
@@ -1972,7 +1972,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(ptr addrspace(1)
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_load_dword v2, v0, s[6:7] glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc, 2, v1
+; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc, 2, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, 4.0, v2, vcc
 ; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
@@ -1990,7 +1990,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(ptr addrspace(1)
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc, 2, v1
+; GFX11-NEXT:    v_cmp_ge_u32_e32 vcc, 2, v1
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, 4.0, v2, vcc
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_endpgm
@@ -2008,7 +2008,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(ptr addrspace(1)
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    global_load_b32 v2, v0, s[4:5] scope:SCOPE_SYS
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_gt_u32_e32 vcc, 2, v1
+; GFX12-NEXT:    v_cmp_ge_u32_e32 vcc, 2, v1
 ; GFX12-NEXT:    v_cndmask_b32_e32 v1, 4.0, v2, vcc
 ; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX12-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
index aea25b37e8f4e..6202bb506f512 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
@@ -10,8 +10,8 @@ define amdgpu_ps float @else1(i32 %z, float %v) #0 {
   ; SI-NEXT: {{  $}}
   ; SI-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1
   ; SI-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
-  ; SI-NEXT:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY1]], implicit $exec
-  ; SI-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI-NEXT:   [[V_CMP_GE_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_I32_e64 6, killed [[COPY1]], implicit $exec
+  ; SI-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GE_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
   ; SI-NEXT:   S_BRANCH %bb.3
   ; SI-NEXT: {{  $}}
   ; SI-NEXT: bb.1.Flow:
@@ -66,8 +66,8 @@ define amdgpu_ps float @else2(i32 %z, float %v) #0 {
   ; SI-NEXT: {{  $}}
   ; SI-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1
   ; SI-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
-  ; SI-NEXT:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY1]], implicit $exec
-  ; SI-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI-NEXT:   [[V_CMP_GE_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_I32_e64 6, killed [[COPY1]], implicit $exec
+  ; SI-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GE_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
   ; SI-NEXT:   S_BRANCH %bb.3
   ; SI-NEXT: {{  $}}
   ; SI-NEXT: bb.1.Flow:
@@ -126,7 +126,7 @@ define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 {
   ; SI-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY killed $sgpr0
   ; SI-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1
   ; SI-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
-  ; SI-NEXT:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY3]], implicit $exec
+  ; SI-NEXT:   [[V_CMP_GE_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_I32_e64 6, killed [[COPY3]], implicit $exec
   ; SI-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; SI-NEXT: {{  $}}
   ; SI-NEXT: bb.1.for.body:
@@ -134,7 +134,7 @@ define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 {
   ; SI-NEXT: {{  $}}
   ; SI-NEXT:   [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %14, %bb.5
   ; SI-NEXT:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %13, %bb.5
-  ; SI-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[V_CMP_GT_I32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[V_CMP_GE_I32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
   ; SI-NEXT:   S_BRANCH %bb.4
   ; SI-NEXT: {{  $}}
   ; SI-NEXT: bb.2.Flow:
@@ -225,8 +225,8 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun
   ; SI-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed $vgpr2
   ; SI-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1
   ; SI-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
-  ; SI-NEXT:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY5]], implicit $exec
-  ; SI-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI-NEXT:   [[V_CMP_GE_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_I32_e64 6, killed [[COPY5]], implicit $exec
+  ; SI-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GE_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
   ; SI-NEXT:   S_BRANCH %bb.6
   ; SI-NEXT: {{  $}}
   ; SI-NEXT: bb.1.Flow:
@@ -348,8 +348,8 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e
   ; SI-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed $vgpr2
   ; SI-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1
   ; SI-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
-  ; SI-NEXT:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY5]], implicit $exec
-  ; SI-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI-NEXT:   [[V_CMP_GE_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_I32_e64 6, killed [[COPY5]], implicit $exec
+  ; SI-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GE_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
   ; SI-NEXT:   S_BRANCH %bb.6
   ; SI-NEXT: {{  $}}
   ; SI-NEXT: bb.1.Flow:
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll
index c0b56d05f72aa..dabc8295d877c 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll
@@ -5,7 +5,7 @@
 define amdgpu_ps float @else1(i32 %z, float %v) #0 {
 ; SI-LABEL: else1:
 ; SI:       ; %bb.0: ; %main_body
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 6, v0
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 6, v0
 ; SI-NEXT:    ; implicit-def: $vgpr0
 ; SI-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; SI-NEXT:    s_xor_b32 s0, exec_lo, s0
@@ -48,7 +48,7 @@ end:
 define amdgpu_ps float @else2(i32 %z, float %v) #0 {
 ; SI-LABEL: else2:
 ; SI:       ; %bb.0: ; %main_body
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 6, v0
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 6, v0
 ; SI-NEXT:    ; implicit-def: $vgpr0
 ; SI-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; SI-NEXT:    s_xor_b32 s0, exec_lo, s0
@@ -87,7 +87,7 @@ define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 {
 ; SI-LABEL: else3:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_mov_b32 s1, 0
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 6, v0
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 6, v0
 ; SI-NEXT:    s_branch .LBB2_2
 ; SI-NEXT:  .LBB2_1: ; %if.end
 ; SI-NEXT:    ; in Loop: Header=BB2_2 Depth=1
@@ -166,7 +166,7 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun
 ; SI-NEXT:    s_mov_b32 s13, SCRATCH_RSRC_DWORD1
 ; SI-NEXT:    s_mov_b32 s14, -1
 ; SI-NEXT:    v_mov_b32_e32 v0, v1
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 6, v6
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 6, v6
 ; SI-NEXT:    s_mov_b32 s15, 0x31c16000
 ; SI-NEXT:    s_add_u32 s12, s12, s1
 ; SI-NEXT:    s_addc_u32 s13, s13, 0
@@ -243,7 +243,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e
 ; SI-NEXT:    s_mov_b32 s13, SCRATCH_RSRC_DWORD1
 ; SI-NEXT:    s_mov_b32 s14, -1
 ; SI-NEXT:    v_mov_b32_e32 v40, v1
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 6, v0
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 6, v0
 ; SI-NEXT:    s_mov_b32 s15, 0x31c16000
 ; SI-NEXT:    s_add_u32 s12, s12, s1
 ; SI-NEXT:    s_addc_u32 s13, s13, 0
diff --git a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
index 310f3ad04917b..4a1dd93d303f5 100644
--- a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
+++ b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
@@ -13,7 +13,7 @@ define amdgpu_kernel void @v3i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
 ; GFX942-NEXT:    global_load_dword v3, v1, s[0:1]
 ; GFX942-NEXT:    s_mov_b32 s4, 0xff0000
 ; GFX942-NEXT:    v_mov_b32_e32 v0, 0
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v4
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v4
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    v_lshrrev_b32_sdwa v5, v2, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX942-NEXT:    s_nop 0
@@ -62,7 +62,7 @@ define amdgpu_kernel void @v4i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
 ; GFX942-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    global_load_dword v1, v2, s[0:1]
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v3
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v3
 ; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX942-NEXT:    s_cbranch_execz .LBB1_2
 ; GFX942-NEXT:  ; %bb.1: ; %bb.1
@@ -99,7 +99,7 @@ define amdgpu_kernel void @v5i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
 ; GFX942-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    global_load_dwordx2 v[0:1], v3, s[0:1]
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v4
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v4
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    v_and_b32_e32 v1, 0xff, v1
 ; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
@@ -140,7 +140,7 @@ define amdgpu_kernel void @v8i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
 ; GFX942-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    global_load_dwordx2 v[0:1], v3, s[0:1]
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v4
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v4
 ; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX942-NEXT:    s_cbranch_execz .LBB3_2
 ; GFX942-NEXT:  ; %bb.1: ; %bb.1
@@ -177,7 +177,7 @@ define amdgpu_kernel void @v16i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1
 ; GFX942-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    global_load_dwordx4 v[0:3], v5, s[0:1]
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v6
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v6
 ; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX942-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX942-NEXT:  ; %bb.1: ; %bb.1
@@ -215,7 +215,7 @@ define amdgpu_kernel void @v32i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    global_load_dwordx4 v[4:7], v9, s[0:1] offset:16
 ; GFX942-NEXT:    global_load_dwordx4 v[0:3], v9, s[0:1]
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v10
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v10
 ; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX942-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX942-NEXT:  ; %bb.1: ; %bb.1
@@ -270,7 +270,7 @@ define amdgpu_kernel void @v256i8_liveout(ptr addrspace(1) %src1, ptr addrspace(
 ; GFX942-NEXT:    global_load_dwordx4 v[40:43], v1, s[0:1] offset:32
 ; GFX942-NEXT:    global_load_dwordx4 v[36:39], v1, s[0:1] offset:16
 ; GFX942-NEXT:    global_load_dwordx4 v[32:35], v1, s[0:1]
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v2
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v2
 ; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX942-NEXT:    s_cbranch_execz .LBB6_2
 ; GFX942-NEXT:  ; %bb.1: ; %bb.1
@@ -393,15 +393,15 @@ define amdgpu_kernel void @v8i8_phi_chain(ptr addrspace(1) %src1, ptr addrspace(
 ; GFX942-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
 ; GFX942-NEXT:    v_and_b32_e32 v2, 0x3ff, v0
 ; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v2
-; GFX942-NEXT:    v_cmp_lt_u32_e64 s[0:1], 14, v2
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v2
+; GFX942-NEXT:    v_cmp_le_u32_e64 s[0:1], 14, v2
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v2
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    global_load_dwordx2 v[0:1], v3, s[8:9]
 ; GFX942-NEXT:    s_and_saveexec_b64 s[2:3], vcc
 ; GFX942-NEXT:    s_cbranch_execz .LBB8_2
 ; GFX942-NEXT:  ; %bb.1: ; %bb.1
 ; GFX942-NEXT:    global_load_dwordx2 v[0:1], v3, s[10:11]
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 7, v2
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 7, v2
 ; GFX942-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
 ; GFX942-NEXT:    s_and_b64 s[4:5], vcc, exec
 ; GFX942-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
@@ -449,8 +449,8 @@ define amdgpu_kernel void @v8i8_phi_zeroinit(ptr addrspace(1) %src1, ptr addrspa
 ; GFX942-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
 ; GFX942-NEXT:    v_and_b32_e32 v4, 0x3ff, v0
 ; GFX942-NEXT:    v_lshlrev_b32_e32 v5, 3, v4
-; GFX942-NEXT:    v_cmp_lt_u32_e64 s[0:1], 14, v4
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v4
+; GFX942-NEXT:    v_cmp_le_u32_e64 s[0:1], 14, v4
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v4
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    global_load_dwordx2 v[0:1], v5, s[8:9]
 ; GFX942-NEXT:    ; implicit-def: $vgpr2_vgpr3
@@ -458,7 +458,7 @@ define amdgpu_kernel void @v8i8_phi_zeroinit(ptr addrspace(1) %src1, ptr addrspa
 ; GFX942-NEXT:    s_cbranch_execz .LBB9_2
 ; GFX942-NEXT:  ; %bb.1: ; %bb.1
 ; GFX942-NEXT:    global_load_dwordx2 v[2:3], v5, s[10:11]
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 7, v4
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 7, v4
 ; GFX942-NEXT:    s_waitcnt vmcnt(1)
 ; GFX942-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX942-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
@@ -509,8 +509,8 @@ define amdgpu_kernel void @v8i8_phi_const(ptr addrspace(1) %src1, ptr addrspace(
 ; GFX942-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
 ; GFX942-NEXT:    v_and_b32_e32 v16, 0x3ff, v0
 ; GFX942-NEXT:    v_lshlrev_b32_e32 v1, 3, v16
-; GFX942-NEXT:    v_cmp_lt_u32_e64 s[0:1], 14, v16
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v16
+; GFX942-NEXT:    v_cmp_le_u32_e64 s[0:1], 14, v16
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v16
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    global_load_dwordx2 v[2:3], v1, s[8:9]
 ; GFX942-NEXT:    ; implicit-def: $vgpr0
@@ -531,7 +531,7 @@ define amdgpu_kernel void @v8i8_phi_const(ptr addrspace(1) %src1, ptr addrspace(
 ; GFX942-NEXT:    s_cbranch_execz .LBB10_2
 ; GFX942-NEXT:  ; %bb.1: ; %bb.1
 ; GFX942-NEXT:    global_load_dwordx2 v[0:1], v1, s[10:11]
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 7, v16
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 7, v16
 ; GFX942-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
 ; GFX942-NEXT:    s_and_b64 s[4:5], vcc, exec
 ; GFX942-NEXT:    v_mov_b32_e32 v4, 8
@@ -621,7 +621,7 @@ define amdgpu_kernel void @v8i8_multi_block(ptr addrspace(1) %src1, ptr addrspac
 ; GFX942-NEXT:    v_and_b32_e32 v5, 0x3ff, v0
 ; GFX942-NEXT:    v_lshlrev_b32_e32 v6, 3, v5
 ; GFX942-NEXT:    v_mov_b32_e32 v4, 0
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v5
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v5
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    global_load_dwordx2 v[2:3], v6, s[8:9]
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
@@ -630,7 +630,7 @@ define amdgpu_kernel void @v8i8_multi_block(ptr addrspace(1) %src1, ptr addrspac
 ; GFX942-NEXT:    s_cbranch_execz .LBB11_4
 ; GFX942-NEXT:  ; %bb.1: ; %bb.1
 ; GFX942-NEXT:    global_load_dwordx2 v[0:1], v6, s[10:11]
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 7, v5
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 7, v5
 ; GFX942-NEXT:    s_and_saveexec_b64 s[2:3], vcc
 ; GFX942-NEXT:    s_cbranch_execz .LBB11_3
 ; GFX942-NEXT:  ; %bb.2: ; %bb.2
@@ -671,7 +671,7 @@ define amdgpu_kernel void @v32i8_loop_carried(ptr addrspace(1) %src1, ptr addrsp
 ; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX942-NEXT:    v_and_b32_e32 v1, 0x3ff, v0
 ; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 5, v1
-; GFX942-NEXT:    v_cmp_lt_u32_e32 vcc, 14, v1
+; GFX942-NEXT:    v_cmp_le_u32_e32 vcc, 14, v1
 ; GFX942-NEXT:    s_mov_b32 s2, 0x2000604
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    global_load_dword v0, v0, s[0:1]
@@ -719,10 +719,10 @@ define amdgpu_kernel void @v8i8_multiuse_multiblock(ptr addrspace(1) %src1, ptr
 ; GFX942-NEXT:    v_and_b32_e32 v2, 0x3ff, v0
 ; GFX942-NEXT:    v_lshlrev_b32_e32 v0, 3, v2
 ; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x44
-; GFX942-NEXT:    v_cmp_lt_u32_e64 s[2:3], 14, v2
+; GFX942-NEXT:    v_cmp_le_u32_e64 s[2:3], 14, v2
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    global_load_dwordx2 v[0:1], v0, s[8:9]
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v2
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v2
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
 ; GFX942-NEXT:    s_and_saveexec_b64 s[4:5], vcc
@@ -732,7 +732,7 @@ define amdgpu_kernel void @v8i8_multiuse_multiblock(ptr addrspace(1) %src1, ptr
 ; GFX942-NEXT:    v_mov_b32_e32 v5, 8
 ; GFX942-NEXT:    v_and_b32_sdwa v6, v0, s6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; GFX942-NEXT:    s_mov_b32 s6, 0x6070504
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 7, v2
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 7, v2
 ; GFX942-NEXT:    v_and_b32_e32 v4, 0xffffff00, v0
 ; GFX942-NEXT:    v_lshlrev_b16_sdwa v5, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX942-NEXT:    v_perm_b32 v7, v0, v0, s6
@@ -862,7 +862,7 @@ define amdgpu_kernel void @v8i8_mfma_i8(ptr addrspace(1) %src1, ptr addrspace(1)
 ; GFX942-NEXT:    v_and_b32_e32 v4, 0x3ff, v0
 ; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v4
 ; GFX942-NEXT:    v_mov_b32_e32 v2, 0
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v4
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v4
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    global_load_dwordx2 v[0:1], v3, s[8:9]
 ; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
@@ -912,7 +912,7 @@ define amdgpu_kernel void @v8i8_mfma_half(ptr addrspace(1) %src1, ptr addrspace(
 ; GFX942-NEXT:    v_and_b32_e32 v4, 0x3ff, v0
 ; GFX942-NEXT:    v_lshlrev_b32_e32 v3, 3, v4
 ; GFX942-NEXT:    v_mov_b32_e32 v2, 0
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v4
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v4
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    global_load_dwordx2 v[0:1], v3, s[36:37]
 ; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
@@ -1002,7 +1002,7 @@ define amdgpu_kernel void @v8i8_intrinsic(ptr addrspace(1) %src1, ptr addrspace(
 ; GFX942-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    global_load_dwordx2 v[0:1], v3, s[0:1]
-; GFX942-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v4
+; GFX942-NEXT:    v_cmp_ge_u32_e32 vcc, 15, v4
 ; GFX942-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX942-NEXT:    s_cbranch_execz .LBB16_2
 ; GFX942-NEXT:  ; %bb.1: ; %bb.1
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp-true16.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp-true16.mir
index 8f63f6c8cb1c6..ea8e1027aedbc 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp-true16.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp-true16.mir
@@ -23,7 +23,7 @@ body:             |
     ; GCN-NEXT: V_CMP_NGE_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
     ; GCN-NEXT: [[V_CMP_NGE_F32_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F32_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec
     ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F32_e64_dpp]], 10101, implicit-def $scc
-    ; GCN-NEXT: V_CMP_GT_I32_e32_dpp [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
+    ; GCN-NEXT: V_CMP_GE_I32_e32_dpp [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = COPY $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
index 8c7e3834fcc5c..31f010eaac988 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -29,7 +29,7 @@ body:             |
     ; GCN-NEXT: [[V_CMP_NGE_F16_fake16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_fake16_e64 0, [[V_CMP_NGE_F16_fake16_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec
     ; GCN-NEXT: [[V_CMP_NGE_F32_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F32_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec
     ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F32_e64_dpp]], 10101, implicit-def $scc
-    ; GCN-NEXT: V_CMP_GT_I32_e32_dpp [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
+    ; GCN-NEXT: V_CMP_GE_I32_e32_dpp [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = COPY $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index b7c12854d1115..7012711921827 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -13,7 +13,7 @@ define amdgpu_kernel void @test_vopc_i32(ptr addrspace(1) %arg) {
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    global_load_dword v1, v0, s[0:1]
 ; GFX1032-NEXT:    s_waitcnt vmcnt(0)
-; GFX1032-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0, v1
+; GFX1032-NEXT:    v_cmp_le_i32_e32 vcc_lo, 0, v1
 ; GFX1032-NEXT:    v_cndmask_b32_e64 v1, 2, 1, vcc_lo
 ; GFX1032-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX1032-NEXT:    s_endpgm
@@ -25,7 +25,7 @@ define amdgpu_kernel void @test_vopc_i32(ptr addrspace(1) %arg) {
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    global_load_dword v1, v0, s[0:1]
 ; GFX1064-NEXT:    s_waitcnt vmcnt(0)
-; GFX1064-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v1
+; GFX1064-NEXT:    v_cmp_le_i32_e32 vcc, 0, v1
 ; GFX1064-NEXT:    v_cndmask_b32_e64 v1, 2, 1, vcc
 ; GFX1064-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX1064-NEXT:    s_endpgm
@@ -244,8 +244,8 @@ define amdgpu_kernel void @test_vop3_cmp_i32_sop_xor(ptr addrspace(1) %arg) {
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    global_load_dword v1, v0, s[2:3]
 ; GFX1032-NEXT:    s_waitcnt vmcnt(0)
-; GFX1032-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0, v1
-; GFX1032-NEXT:    v_cmp_gt_i32_e64 s0, 1, v1
+; GFX1032-NEXT:    v_cmp_le_i32_e32 vcc_lo, 0, v1
+; GFX1032-NEXT:    v_cmp_ge_i32_e64 s0, 1, v1
 ; GFX1032-NEXT:    s_xor_b32 s0, vcc_lo, s0
 ; GFX1032-NEXT:    v_cndmask_b32_e64 v1, 2, 1, s0
 ; GFX1032-NEXT:    global_store_dword v0, v1, s[2:3]
@@ -258,8 +258,8 @@ define amdgpu_kernel void @test_vop3_cmp_i32_sop_xor(ptr addrspace(1) %arg) {
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    global_load_dword v1, v0, s[2:3]
 ; GFX1064-NEXT:    s_waitcnt vmcnt(0)
-; GFX1064-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v1
-; GFX1064-NEXT:    v_cmp_gt_i32_e64 s[0:1], 1, v1
+; GFX1064-NEXT:    v_cmp_le_i32_e32 vcc, 0, v1
+; GFX1064-NEXT:    v_cmp_ge_i32_e64 s[0:1], 1, v1
 ; GFX1064-NEXT:    s_xor_b64 s[0:1], vcc, s[0:1]
 ; GFX1064-NEXT:    v_cndmask_b32_e64 v1, 2, 1, s[0:1]
 ; GFX1064-NEXT:    global_store_dword v0, v1, s[2:3]
@@ -283,8 +283,8 @@ define amdgpu_kernel void @test_vop3_cmp_u32_sop_or(ptr addrspace(1) %arg) {
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    global_load_dword v1, v0, s[2:3]
 ; GFX1032-NEXT:    s_waitcnt vmcnt(0)
-; GFX1032-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 3, v1
-; GFX1032-NEXT:    v_cmp_gt_u32_e64 s0, 2, v1
+; GFX1032-NEXT:    v_cmp_le_u32_e32 vcc_lo, 3, v1
+; GFX1032-NEXT:    v_cmp_ge_u32_e64 s0, 2, v1
 ; GFX1032-NEXT:    s_or_b32 s0, vcc_lo, s0
 ; GFX1032-NEXT:    v_cndmask_b32_e64 v1, 2, 1, s0
 ; GFX1032-NEXT:    global_store_dword v0, v1, s[2:3]
@@ -297,8 +297,8 @@ define amdgpu_kernel void @test_vop3_cmp_u32_sop_or(ptr addrspace(1) %arg) {
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    global_load_dword v1, v0, s[2:3]
 ; GFX1064-NEXT:    s_waitcnt vmcnt(0)
-; GFX1064-NEXT:    v_cmp_lt_u32_e32 vcc, 3, v1
-; GFX1064-NEXT:    v_cmp_gt_u32_e64 s[0:1], 2, v1
+; GFX1064-NEXT:    v_cmp_le_u32_e32 vcc, 3, v1
+; GFX1064-NEXT:    v_cmp_ge_u32_e64 s[0:1], 2, v1
 ; GFX1064-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
 ; GFX1064-NEXT:    v_cndmask_b32_e64 v1, 2, 1, s[0:1]
 ; GFX1064-NEXT:    global_store_dword v0, v1, s[2:3]
@@ -317,7 +317,7 @@ define amdgpu_kernel void @test_vop3_cmp_u32_sop_or(ptr addrspace(1) %arg) {
 define amdgpu_kernel void @test_mask_if(ptr addrspace(1) %arg) #0 {
 ; GFX1032-LABEL: test_mask_if:
 ; GFX1032:       ; %bb.0:
-; GFX1032-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 10, v0
+; GFX1032-NEXT:    v_cmp_le_u32_e32 vcc_lo, 10, v0
 ; GFX1032-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GFX1032-NEXT:    s_cbranch_execz .LBB9_2
 ; GFX1032-NEXT:  ; %bb.1: ; %if
@@ -330,7 +330,7 @@ define amdgpu_kernel void @test_mask_if(ptr addrspace(1) %arg) #0 {
 ;
 ; GFX1064-LABEL: test_mask_if:
 ; GFX1064:       ; %bb.0:
-; GFX1064-NEXT:    v_cmp_lt_u32_e32 vcc, 10, v0
+; GFX1064-NEXT:    v_cmp_le_u32_e32 vcc, 10, v0
 ; GFX1064-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064-NEXT:    s_cbranch_execz .LBB9_2
 ; GFX1064-NEXT:  ; %bb.1: ; %if
@@ -364,7 +364,7 @@ define amdgpu_kernel void @test_loop_with_if(ptr addrspace(1) %arg) #0 {
 ; GFX1032-NEXT:    ; in Loop: Header=BB10_2 Depth=1
 ; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s4
-; GFX1032-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0xfe, v4
+; GFX1032-NEXT:    v_cmp_le_i32_e32 vcc_lo, 0xfe, v4
 ; GFX1032-NEXT:    v_add_nc_u32_e32 v1, 1, v4
 ; GFX1032-NEXT:    s_or_b32 s2, vcc_lo, s2
 ; GFX1032-NEXT:    s_andn2_b32 exec_lo, exec_lo, s2
@@ -387,7 +387,7 @@ define amdgpu_kernel void @test_loop_with_if(ptr addrspace(1) %arg) #0 {
 ; GFX1032-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, s1, v3, vcc_lo
 ; GFX1032-NEXT:    global_load_dword v4, v[2:3], off
 ; GFX1032-NEXT:    s_waitcnt vmcnt(0)
-; GFX1032-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 11, v4
+; GFX1032-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 11, v4
 ; GFX1032-NEXT:    s_and_b32 s6, vcc_lo, exec_lo
 ; GFX1032-NEXT:    s_or_b32 s4, s4, s6
 ; GFX1032-NEXT:  .LBB10_4: ; %Flow
@@ -426,7 +426,7 @@ define amdgpu_kernel void @test_loop_with_if(ptr addrspace(1) %arg) #0 {
 ; GFX1064-NEXT:    ; in Loop: Header=BB10_2 Depth=1
 ; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[6:7]
-; GFX1064-NEXT:    v_cmp_lt_i32_e32 vcc, 0xfe, v4
+; GFX1064-NEXT:    v_cmp_le_i32_e32 vcc, 0xfe, v4
 ; GFX1064-NEXT:    v_add_nc_u32_e32 v1, 1, v4
 ; GFX1064-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
 ; GFX1064-NEXT:    s_andn2_b64 exec, exec, s[2:3]
@@ -449,7 +449,7 @@ define amdgpu_kernel void @test_loop_with_if(ptr addrspace(1) %arg) #0 {
 ; GFX1064-NEXT:    v_add_co_ci_u32_e32 v3, vcc, s1, v3, vcc
 ; GFX1064-NEXT:    global_load_dword v4, v[2:3], off
 ; GFX1064-NEXT:    s_waitcnt vmcnt(0)
-; GFX1064-NEXT:    v_cmp_gt_i32_e32 vcc, 11, v4
+; GFX1064-NEXT:    v_cmp_ge_i32_e32 vcc, 11, v4
 ; GFX1064-NEXT:    s_and_b64 s[10:11], vcc, exec
 ; GFX1064-NEXT:    s_or_b64 s[6:7], s[6:7], s[10:11]
 ; GFX1064-NEXT:  .LBB10_4: ; %Flow
@@ -548,7 +548,7 @@ define amdgpu_kernel void @test_loop_with_if_else_break(ptr addrspace(1) %arg) #
 ; GFX1032-NEXT:    global_load_dword v3, v2, s[0:1]
 ; GFX1032-NEXT:    s_or_b32 s4, s4, exec_lo
 ; GFX1032-NEXT:    s_waitcnt vmcnt(0)
-; GFX1032-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 11, v3
+; GFX1032-NEXT:    v_cmp_ge_i32_e32 vcc_lo, 11, v3
 ; GFX1032-NEXT:    s_cbranch_vccz .LBB11_2
 ; GFX1032-NEXT:  ; %bb.5: ; in Loop: Header=BB11_4 Depth=1
 ; GFX1032-NEXT:    ; implicit-def: $sgpr3
@@ -592,7 +592,7 @@ define amdgpu_kernel void @test_loop_with_if_else_break(ptr addrspace(1) %arg) #
 ; GFX1064-NEXT:    global_load_dword v3, v2, s[0:1]
 ; GFX1064-NEXT:    s_or_b64 s[4:5], s[4:5], exec
 ; GFX1064-NEXT:    s_waitcnt vmcnt(0)
-; GFX1064-NEXT:    v_cmp_gt_i32_e32 vcc, 11, v3
+; GFX1064-NEXT:    v_cmp_ge_i32_e32 vcc, 11, v3
 ; GFX1064-NEXT:    s_cbranch_vccz .LBB11_2
 ; GFX1064-NEXT:  ; %bb.5: ; in Loop: Header=BB11_4 Depth=1
 ; GFX1064-NEXT:    ; implicit-def: $sgpr6
@@ -827,7 +827,7 @@ define amdgpu_kernel void @test_udiv64(ptr addrspace(1) %arg) #0 {
 ; GFX1032-NEXT:    s_cmp_lg_u32 s11, 0
 ; GFX1032-NEXT:    s_subb_u32 s10, s10, s5
 ; GFX1032-NEXT:    s_cmp_lg_u32 s12, 0
-; GFX1032-NEXT:    v_cmp_le_u32_e32 vcc_lo, s4, v1
+; GFX1032-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s4, v1
 ; GFX1032-NEXT:    s_subb_u32 s10, s10, 0
 ; GFX1032-NEXT:    s_cmp_ge_u32 s10, s5
 ; GFX1032-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
@@ -840,7 +840,7 @@ define amdgpu_kernel void @test_udiv64(ptr addrspace(1) %arg) #0 {
 ; GFX1032-NEXT:    s_add_u32 s13, s1, 2
 ; GFX1032-NEXT:    s_addc_u32 s14, s9, 0
 ; GFX1032-NEXT:    s_cmp_lg_u32 s11, 0
-; GFX1032-NEXT:    v_cmp_le_u32_e32 vcc_lo, s4, v0
+; GFX1032-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s4, v0
 ; GFX1032-NEXT:    s_subb_u32 s0, s7, s0
 ; GFX1032-NEXT:    v_mov_b32_e32 v2, s13
 ; GFX1032-NEXT:    s_cmp_ge_u32 s0, s5
@@ -989,7 +989,7 @@ define amdgpu_kernel void @test_udiv64(ptr addrspace(1) %arg) #0 {
 ; GFX1064-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX1064-NEXT:    s_subb_u32 s13, s13, s5
 ; GFX1064-NEXT:    s_cmp_lg_u64 s[8:9], 0
-; GFX1064-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
+; GFX1064-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v1
 ; GFX1064-NEXT:    s_subb_u32 s8, s13, 0
 ; GFX1064-NEXT:    s_cmp_ge_u32 s8, s5
 ; GFX1064-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
@@ -1002,7 +1002,7 @@ define amdgpu_kernel void @test_udiv64(ptr addrspace(1) %arg) #0 {
 ; GFX1064-NEXT:    s_add_u32 s13, s10, 2
 ; GFX1064-NEXT:    s_addc_u32 s14, s11, 0
 ; GFX1064-NEXT:    s_cmp_lg_u64 s[0:1], 0
-; GFX1064-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
+; GFX1064-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v0
 ; GFX1064-NEXT:    s_subb_u32 s0, s7, s12
 ; GFX1064-NEXT:    v_mov_b32_e32 v2, s13
 ; GFX1064-NEXT:    s_cmp_ge_u32 s0, s5
@@ -1914,7 +1914,7 @@ define amdgpu_ps float @test_wwm2(i32 inreg %idx) {
 ; GFX1032:       ; %bb.0: ; %main_body
 ; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX1032-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX1032-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 16, v0
+; GFX1032-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 16, v0
 ; GFX1032-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX1032-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX1032-NEXT:    s_cbranch_execz .LBB35_2
@@ -1935,7 +1935,7 @@ define amdgpu_ps float @test_wwm2(i32 inreg %idx) {
 ; GFX1064:       ; %bb.0: ; %main_body
 ; GFX1064-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX1064-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v0
+; GFX1064-NEXT:    v_cmp_ge_u32_e32 vcc, 16, v0
 ; GFX1064-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX1064-NEXT:    s_and_saveexec_b64 s[2:3], vcc
 ; GFX1064-NEXT:    s_cbranch_execz .LBB35_2
@@ -2001,7 +2001,7 @@ define amdgpu_ps float @test_strict_wwm2(i32 inreg %idx) {
 ; GFX1032:       ; %bb.0: ; %main_body
 ; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX1032-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX1032-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 16, v0
+; GFX1032-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 16, v0
 ; GFX1032-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX1032-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX1032-NEXT:    s_cbranch_execz .LBB37_2
@@ -2022,7 +2022,7 @@ define amdgpu_ps float @test_strict_wwm2(i32 inreg %idx) {
 ; GFX1064:       ; %bb.0: ; %main_body
 ; GFX1064-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX1064-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v0
+; GFX1064-NEXT:    v_cmp_ge_u32_e32 vcc, 16, v0
 ; GFX1064-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX1064-NEXT:    s_and_saveexec_b64 s[2:3], vcc
 ; GFX1064-NEXT:    s_cbranch_execz .LBB37_2
@@ -2488,10 +2488,10 @@ define amdgpu_kernel void @icmp64(i32 %n, i32 %s) {
 ; GFX1032-NEXT:    v_mul_lo_u32 v1, v1, s0
 ; GFX1032-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
 ; GFX1032-NEXT:    v_subrev_nc_u32_e32 v1, s0, v0
-; GFX1032-NEXT:    v_cmp_le_u32_e32 vcc_lo, s0, v0
+; GFX1032-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s0, v0
 ; GFX1032-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX1032-NEXT:    v_subrev_nc_u32_e32 v1, s0, v0
-; GFX1032-NEXT:    v_cmp_le_u32_e32 vcc_lo, s0, v0
+; GFX1032-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s0, v0
 ; GFX1032-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX1032-NEXT:    s_lshr_b32 s0, vcc_lo, 1
@@ -2521,10 +2521,10 @@ define amdgpu_kernel void @icmp64(i32 %n, i32 %s) {
 ; GFX1064-NEXT:    v_mul_lo_u32 v1, v1, s0
 ; GFX1064-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
 ; GFX1064-NEXT:    v_subrev_nc_u32_e32 v1, s0, v0
-; GFX1064-NEXT:    v_cmp_le_u32_e32 vcc, s0, v0
+; GFX1064-NEXT:    v_cmp_lt_u32_e32 vcc, s0, v0
 ; GFX1064-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX1064-NEXT:    v_subrev_nc_u32_e32 v1, s0, v0
-; GFX1064-NEXT:    v_cmp_le_u32_e32 vcc, s0, v0
+; GFX1064-NEXT:    v_cmp_lt_u32_e32 vcc, s0, v0
 ; GFX1064-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX1064-NEXT:    s_lshr_b64 s[0:1], vcc, 1
@@ -2674,10 +2674,10 @@ define amdgpu_kernel void @icmp32(i32 %n, i32 %s) {
 ; GFX1032-NEXT:    v_mul_lo_u32 v1, v1, s0
 ; GFX1032-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
 ; GFX1032-NEXT:    v_subrev_nc_u32_e32 v1, s0, v0
-; GFX1032-NEXT:    v_cmp_le_u32_e32 vcc_lo, s0, v0
+; GFX1032-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s0, v0
 ; GFX1032-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX1032-NEXT:    v_subrev_nc_u32_e32 v1, s0, v0
-; GFX1032-NEXT:    v_cmp_le_u32_e32 vcc_lo, s0, v0
+; GFX1032-NEXT:    v_cmp_lt_u32_e32 vcc_lo, s0, v0
 ; GFX1032-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX1032-NEXT:    s_lshr_b32 s0, vcc_lo, 1
@@ -2708,10 +2708,10 @@ define amdgpu_kernel void @icmp32(i32 %n, i32 %s) {
 ; GFX1064-NEXT:    v_mul_lo_u32 v1, v1, s0
 ; GFX1064-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
 ; GFX1064-NEXT:    v_subrev_nc_u32_e32 v1, s0, v0
-; GFX1064-NEXT:    v_cmp_le_u32_e32 vcc, s0, v0
+; GFX1064-NEXT:    v_cmp_lt_u32_e32 vcc, s0, v0
 ; GFX1064-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX1064-NEXT:    v_subrev_nc_u32_e32 v1, s0, v0
-; GFX1064-NEXT:    v_cmp_le_u32_e32 vcc, s0, v0
+; GFX1064-NEXT:    v_cmp_lt_u32_e32 vcc, s0, v0
 ; GFX1064-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX1064-NEXT:    s_lshr_b32 s0, vcc_lo, 1
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index 1ca2a8ada68ea..05c00dea04d99 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -504,7 +504,7 @@ define amdgpu_ps float @test_wwm3(i32 inreg %idx) {
 ; GFX9-W64:       ; %bb.0: ; %main_body
 ; GFX9-W64-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX9-W64-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX9-W64-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v0
+; GFX9-W64-NEXT:    v_cmp_ge_u32_e32 vcc, 16, v0
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-W64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
 ; GFX9-W64-NEXT:    s_cbranch_execz .LBB13_2
@@ -525,7 +525,7 @@ define amdgpu_ps float @test_wwm3(i32 inreg %idx) {
 ; GFX10-W32:       ; %bb.0: ; %main_body
 ; GFX10-W32-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX10-W32-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX10-W32-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 16, v0
+; GFX10-W32-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 16, v0
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-W32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX10-W32-NEXT:    s_cbranch_execz .LBB13_2
@@ -569,7 +569,7 @@ define amdgpu_ps float @test_wwm4(i32 inreg %idx) {
 ; GFX9-W64:       ; %bb.0: ; %main_body
 ; GFX9-W64-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX9-W64-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX9-W64-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v0
+; GFX9-W64-NEXT:    v_cmp_ge_u32_e32 vcc, 16, v0
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-W64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
 ; GFX9-W64-NEXT:    s_cbranch_execz .LBB14_2
@@ -589,7 +589,7 @@ define amdgpu_ps float @test_wwm4(i32 inreg %idx) {
 ; GFX10-W32:       ; %bb.0: ; %main_body
 ; GFX10-W32-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX10-W32-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX10-W32-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 16, v0
+; GFX10-W32-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 16, v0
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-W32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX10-W32-NEXT:    s_cbranch_execz .LBB14_2
@@ -691,7 +691,7 @@ define amdgpu_ps float @test_wwm6_then() {
 ; GFX9-W64-NEXT:    s_mov_b64 exec, s[0:1]
 ; GFX9-W64-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX9-W64-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX9-W64-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v0
+; GFX9-W64-NEXT:    v_cmp_ge_u32_e32 vcc, 16, v0
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-W64-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9-W64-NEXT:    s_cbranch_execz .LBB16_2
@@ -714,7 +714,7 @@ define amdgpu_ps float @test_wwm6_then() {
 ; GFX10-W32-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX10-W32-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX10-W32-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX10-W32-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 16, v0
+; GFX10-W32-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 16, v0
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-W32-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GFX10-W32-NEXT:    s_cbranch_execz .LBB16_2
@@ -958,7 +958,7 @@ define amdgpu_ps float @test_strict_wqm3(i32 inreg %idx) {
 ; GFX9-W64:       ; %bb.0: ; %main_body
 ; GFX9-W64-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX9-W64-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX9-W64-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v0
+; GFX9-W64-NEXT:    v_cmp_ge_u32_e32 vcc, 16, v0
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-W64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
 ; GFX9-W64-NEXT:    s_cbranch_execz .LBB21_2
@@ -980,7 +980,7 @@ define amdgpu_ps float @test_strict_wqm3(i32 inreg %idx) {
 ; GFX10-W32:       ; %bb.0: ; %main_body
 ; GFX10-W32-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX10-W32-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX10-W32-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 16, v0
+; GFX10-W32-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 16, v0
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-W32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX10-W32-NEXT:    s_cbranch_execz .LBB21_2
@@ -1025,7 +1025,7 @@ define amdgpu_ps float @test_strict_wqm4(i32 inreg %idx) {
 ; GFX9-W64:       ; %bb.0: ; %main_body
 ; GFX9-W64-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX9-W64-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX9-W64-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v0
+; GFX9-W64-NEXT:    v_cmp_ge_u32_e32 vcc, 16, v0
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-W64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
 ; GFX9-W64-NEXT:    s_cbranch_execz .LBB22_2
@@ -1046,7 +1046,7 @@ define amdgpu_ps float @test_strict_wqm4(i32 inreg %idx) {
 ; GFX10-W32:       ; %bb.0: ; %main_body
 ; GFX10-W32-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX10-W32-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX10-W32-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 16, v0
+; GFX10-W32-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 16, v0
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-W32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX10-W32-NEXT:    s_cbranch_execz .LBB22_2
@@ -1153,7 +1153,7 @@ define amdgpu_ps float @test_strict_wqm6_then() {
 ; GFX9-W64-NEXT:    s_mov_b64 exec, s[0:1]
 ; GFX9-W64-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX9-W64-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX9-W64-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v0
+; GFX9-W64-NEXT:    v_cmp_ge_u32_e32 vcc, 16, v0
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-W64-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9-W64-NEXT:    s_cbranch_execz .LBB24_2
@@ -1178,7 +1178,7 @@ define amdgpu_ps float @test_strict_wqm6_then() {
 ; GFX10-W32-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX10-W32-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX10-W32-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX10-W32-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 16, v0
+; GFX10-W32-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 16, v0
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-W32-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GFX10-W32-NEXT:    s_cbranch_execz .LBB24_2
@@ -2405,7 +2405,7 @@ define amdgpu_ps float @test_strict_wwm3(i32 inreg %idx) {
 ; GFX9-W64:       ; %bb.0: ; %main_body
 ; GFX9-W64-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX9-W64-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX9-W64-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v0
+; GFX9-W64-NEXT:    v_cmp_ge_u32_e32 vcc, 16, v0
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-W64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
 ; GFX9-W64-NEXT:    s_cbranch_execz .LBB43_2
@@ -2426,7 +2426,7 @@ define amdgpu_ps float @test_strict_wwm3(i32 inreg %idx) {
 ; GFX10-W32:       ; %bb.0: ; %main_body
 ; GFX10-W32-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX10-W32-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX10-W32-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 16, v0
+; GFX10-W32-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 16, v0
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-W32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX10-W32-NEXT:    s_cbranch_execz .LBB43_2
@@ -2470,7 +2470,7 @@ define amdgpu_ps float @test_strict_wwm4(i32 inreg %idx) {
 ; GFX9-W64:       ; %bb.0: ; %main_body
 ; GFX9-W64-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX9-W64-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX9-W64-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v0
+; GFX9-W64-NEXT:    v_cmp_ge_u32_e32 vcc, 16, v0
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-W64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
 ; GFX9-W64-NEXT:    s_cbranch_execz .LBB44_2
@@ -2490,7 +2490,7 @@ define amdgpu_ps float @test_strict_wwm4(i32 inreg %idx) {
 ; GFX10-W32:       ; %bb.0: ; %main_body
 ; GFX10-W32-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX10-W32-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX10-W32-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 16, v0
+; GFX10-W32-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 16, v0
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-W32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX10-W32-NEXT:    s_cbranch_execz .LBB44_2
@@ -2592,7 +2592,7 @@ define amdgpu_ps float @test_strict_wwm6_then() {
 ; GFX9-W64-NEXT:    s_mov_b64 exec, s[0:1]
 ; GFX9-W64-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX9-W64-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX9-W64-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v0
+; GFX9-W64-NEXT:    v_cmp_ge_u32_e32 vcc, 16, v0
 ; GFX9-W64-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-W64-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9-W64-NEXT:    s_cbranch_execz .LBB46_2
@@ -2615,7 +2615,7 @@ define amdgpu_ps float @test_strict_wwm6_then() {
 ; GFX10-W32-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX10-W32-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX10-W32-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX10-W32-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 16, v0
+; GFX10-W32-NEXT:    v_cmp_ge_u32_e32 vcc_lo, 16, v0
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-W32-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GFX10-W32-NEXT:    s_cbranch_execz .LBB46_2
@@ -3444,7 +3444,7 @@ define amdgpu_gs void @wqm_init_exec_switch(i32 %arg) {
 ; GFX9-W64-LABEL: wqm_init_exec_switch:
 ; GFX9-W64:       ; %bb.0:
 ; GFX9-W64-NEXT:    s_mov_b64 exec, 0
-; GFX9-W64-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v0
+; GFX9-W64-NEXT:    v_cmp_le_i32_e32 vcc, 0, v0
 ; GFX9-W64-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9-W64-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
 ; GFX9-W64-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
@@ -3454,7 +3454,7 @@ define amdgpu_gs void @wqm_init_exec_switch(i32 %arg) {
 ; GFX10-W32:       ; %bb.0:
 ; GFX10-W32-NEXT:    s_mov_b32 exec_lo, 0
 ; GFX10-W32-NEXT:    s_mov_b32 s0, exec_lo
-; GFX10-W32-NEXT:    v_cmpx_lt_i32_e32 0, v0
+; GFX10-W32-NEXT:    v_cmpx_le_i32_e32 0, v0
 ; GFX10-W32-NEXT:    s_xor_b32 s0, exec_lo, s0
 ; GFX10-W32-NEXT:    s_andn2_saveexec_b32 s0, s0
 ; GFX10-W32-NEXT:    s_endpgm
@@ -3523,7 +3523,7 @@ define amdgpu_ps float @short_exact_regions(<8 x i32> inreg %rsrc, <4 x i32> inr
 ; GFX9-W64-NEXT:    image_sample v[3:6], v0, s[0:7], s[8:11] dmask:0xf
 ; GFX9-W64-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX9-W64-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX9-W64-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v0
+; GFX9-W64-NEXT:    v_cmp_ge_u32_e32 vcc, 16, v0
 ; GFX9-W64-NEXT:    s_and_saveexec_b64 s[14:15], vcc
 ; GFX9-W64-NEXT:    s_cbranch_execz .LBB59_2
 ; GFX9-W64-NEXT:  ; %bb.1: ; %if
@@ -3554,7 +3554,7 @@ define amdgpu_ps float @short_exact_regions(<8 x i32> inreg %rsrc, <4 x i32> inr
 ; GFX10-W32-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX10-W32-NEXT:    s_mov_b32 s13, exec_lo
 ; GFX10-W32-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
-; GFX10-W32-NEXT:    v_cmpx_gt_u32_e32 16, v0
+; GFX10-W32-NEXT:    v_cmpx_ge_u32_e32 16, v0
 ; GFX10-W32-NEXT:    s_cbranch_execz .LBB59_2
 ; GFX10-W32-NEXT:  ; %bb.1: ; %if
 ; GFX10-W32-NEXT:    global_load_dword v0, v[1:2], off



More information about the llvm-commits mailing list