[llvm] [AMDGPU] Fix wrong reverse operations for `v_cmp_*` (PR #146398)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 30 11:32:18 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Shilei Tian (shiltian)
<details>
<summary>Changes</summary>
Fixes SWDEV-538616.
---
Patch is 1.09 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146398.diff
119 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/VOPCInstructions.td (+16-16)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll (+30-30)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll (+120-120)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll (+120-120)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/is-safe-to-sink-bug.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll (+11-11)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll (+66-66)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll (+126-126)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll (+81-81)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll (+30-30)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll (+126-126)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-branch-weight-metadata.ll (+9-9)
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll (+45-36)
- (modified) llvm/test/CodeGen/AMDGPU/ashr64_reduce.ll (+20-20)
- (modified) llvm/test/CodeGen/AMDGPU/branch-relaxation.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/carryout-selection.ll (+30-30)
- (modified) llvm/test/CodeGen/AMDGPU/collapse-endcf.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll (+24-24)
- (modified) llvm/test/CodeGen/AMDGPU/commute-compares.ll (+17-17)
- (modified) llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/div_i128.ll (+14-14)
- (modified) llvm/test/CodeGen/AMDGPU/div_v2i128.ll (+56-56)
- (modified) llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll (+22-22)
- (modified) llvm/test/CodeGen/AMDGPU/extract-subvector.ll (+17-17)
- (modified) llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll (+147-147)
- (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll (+144-144)
- (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system.ll (+186-186)
- (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll (+81-81)
- (modified) llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll (+45-45)
- (modified) llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/fptoi.i128.ll (+56-56)
- (modified) llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.math.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll (+81-81)
- (modified) llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll (+11-11)
- (modified) llvm/test/CodeGen/AMDGPU/itofp.i128.ll (+63-63)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll (+13-13)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll (+7-7)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll (+64-64)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll (+16-16)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll (+59-59)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll (+60-60)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.mulo.ll (+10-10)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll (+15-15)
- (modified) llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll (+3-5)
- (modified) llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/rem_i128.ll (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/saddo.ll (+11-11)
- (modified) llvm/test/CodeGen/AMDGPU/saddsat.ll (+11-11)
- (modified) llvm/test/CodeGen/AMDGPU/sdiv64.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/select-constant-xor.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/shift-i128.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/shl64_reduce.ll (+20-20)
- (modified) llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/srem.ll (+10-10)
- (modified) llvm/test/CodeGen/AMDGPU/srem64.ll (+16-16)
- (modified) llvm/test/CodeGen/AMDGPU/srl64_reduce.ll (+20-20)
- (modified) llvm/test/CodeGen/AMDGPU/ssubsat.ll (+73-73)
- (modified) llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/uaddo.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/udiv64.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/uniform-cfg.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll (+7-7)
- (modified) llvm/test/CodeGen/AMDGPU/urem64.ll (+11-11)
- (modified) llvm/test/CodeGen/AMDGPU/v_cndmask.ll (+20-20)
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll (+10-10)
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll (+25-25)
- (modified) llvm/test/CodeGen/AMDGPU/vopc_dpp-true16.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/vopc_dpp.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/wave32.ll (+34-34)
- (modified) llvm/test/CodeGen/AMDGPU/wqm.ll (+22-22)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index ca5ed5cd24603..eb002567f3ef5 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -799,72 +799,72 @@ defm V_CMPX_T_U16 : VOPCX_I16 <"v_cmpx_t_u16">;
} // End SubtargetPredicate = Has16BitInsts
defm V_CMP_F_I32 : VOPC_I32 <"v_cmp_f_i32">;
-defm V_CMP_LT_I32 : VOPC_I32 <"v_cmp_lt_i32", COND_SLT, "v_cmp_gt_i32">;
+defm V_CMP_LT_I32 : VOPC_I32 <"v_cmp_lt_i32", COND_SLT, "v_cmp_ge_i32">;
defm V_CMP_EQ_I32 : VOPC_I32 <"v_cmp_eq_i32">;
-defm V_CMP_LE_I32 : VOPC_I32 <"v_cmp_le_i32", COND_SLE, "v_cmp_ge_i32">;
+defm V_CMP_LE_I32 : VOPC_I32 <"v_cmp_le_i32", COND_SLE, "v_cmp_gt_i32">;
defm V_CMP_GT_I32 : VOPC_I32 <"v_cmp_gt_i32", COND_SGT>;
defm V_CMP_NE_I32 : VOPC_I32 <"v_cmp_ne_i32">;
defm V_CMP_GE_I32 : VOPC_I32 <"v_cmp_ge_i32", COND_SGE>;
defm V_CMP_T_I32 : VOPC_I32 <"v_cmp_t_i32">;
defm V_CMPX_F_I32 : VOPCX_I32 <"v_cmpx_f_i32">;
-defm V_CMPX_LT_I32 : VOPCX_I32 <"v_cmpx_lt_i32", "v_cmpx_gt_i32">;
+defm V_CMPX_LT_I32 : VOPCX_I32 <"v_cmpx_lt_i32", "v_cmpx_ge_i32">;
defm V_CMPX_EQ_I32 : VOPCX_I32 <"v_cmpx_eq_i32">;
-defm V_CMPX_LE_I32 : VOPCX_I32 <"v_cmpx_le_i32", "v_cmpx_ge_i32">;
+defm V_CMPX_LE_I32 : VOPCX_I32 <"v_cmpx_le_i32", "v_cmpx_gt_i32">;
defm V_CMPX_GT_I32 : VOPCX_I32 <"v_cmpx_gt_i32">;
defm V_CMPX_NE_I32 : VOPCX_I32 <"v_cmpx_ne_i32">;
defm V_CMPX_GE_I32 : VOPCX_I32 <"v_cmpx_ge_i32">;
defm V_CMPX_T_I32 : VOPCX_I32 <"v_cmpx_t_i32">;
defm V_CMP_F_I64 : VOPC_I64 <"v_cmp_f_i64">;
-defm V_CMP_LT_I64 : VOPC_I64 <"v_cmp_lt_i64", COND_SLT, "v_cmp_gt_i64">;
+defm V_CMP_LT_I64 : VOPC_I64 <"v_cmp_lt_i64", COND_SLT, "v_cmp_ge_i64">;
defm V_CMP_EQ_I64 : VOPC_I64 <"v_cmp_eq_i64">;
-defm V_CMP_LE_I64 : VOPC_I64 <"v_cmp_le_i64", COND_SLE, "v_cmp_ge_i64">;
+defm V_CMP_LE_I64 : VOPC_I64 <"v_cmp_le_i64", COND_SLE, "v_cmp_gt_i64">;
defm V_CMP_GT_I64 : VOPC_I64 <"v_cmp_gt_i64", COND_SGT>;
defm V_CMP_NE_I64 : VOPC_I64 <"v_cmp_ne_i64">;
defm V_CMP_GE_I64 : VOPC_I64 <"v_cmp_ge_i64", COND_SGE>;
defm V_CMP_T_I64 : VOPC_I64 <"v_cmp_t_i64">;
defm V_CMPX_F_I64 : VOPCX_I64 <"v_cmpx_f_i64">;
-defm V_CMPX_LT_I64 : VOPCX_I64 <"v_cmpx_lt_i64", "v_cmpx_gt_i64">;
+defm V_CMPX_LT_I64 : VOPCX_I64 <"v_cmpx_lt_i64", "v_cmpx_ge_i64">;
defm V_CMPX_EQ_I64 : VOPCX_I64 <"v_cmpx_eq_i64">;
-defm V_CMPX_LE_I64 : VOPCX_I64 <"v_cmpx_le_i64", "v_cmpx_ge_i64">;
+defm V_CMPX_LE_I64 : VOPCX_I64 <"v_cmpx_le_i64", "v_cmpx_gt_i64">;
defm V_CMPX_GT_I64 : VOPCX_I64 <"v_cmpx_gt_i64">;
defm V_CMPX_NE_I64 : VOPCX_I64 <"v_cmpx_ne_i64">;
defm V_CMPX_GE_I64 : VOPCX_I64 <"v_cmpx_ge_i64">;
defm V_CMPX_T_I64 : VOPCX_I64 <"v_cmpx_t_i64">;
defm V_CMP_F_U32 : VOPC_I32 <"v_cmp_f_u32">;
-defm V_CMP_LT_U32 : VOPC_I32 <"v_cmp_lt_u32", COND_ULT, "v_cmp_gt_u32">;
+defm V_CMP_LT_U32 : VOPC_I32 <"v_cmp_lt_u32", COND_ULT, "v_cmp_ge_u32">;
defm V_CMP_EQ_U32 : VOPC_I32 <"v_cmp_eq_u32", COND_EQ>;
-defm V_CMP_LE_U32 : VOPC_I32 <"v_cmp_le_u32", COND_ULE, "v_cmp_ge_u32">;
+defm V_CMP_LE_U32 : VOPC_I32 <"v_cmp_le_u32", COND_ULE, "v_cmp_gt_u32">;
defm V_CMP_GT_U32 : VOPC_I32 <"v_cmp_gt_u32", COND_UGT>;
defm V_CMP_NE_U32 : VOPC_I32 <"v_cmp_ne_u32", COND_NE>;
defm V_CMP_GE_U32 : VOPC_I32 <"v_cmp_ge_u32", COND_UGE>;
defm V_CMP_T_U32 : VOPC_I32 <"v_cmp_t_u32">;
defm V_CMPX_F_U32 : VOPCX_I32 <"v_cmpx_f_u32">;
-defm V_CMPX_LT_U32 : VOPCX_I32 <"v_cmpx_lt_u32", "v_cmpx_gt_u32">;
+defm V_CMPX_LT_U32 : VOPCX_I32 <"v_cmpx_lt_u32", "v_cmpx_ge_u32">;
defm V_CMPX_EQ_U32 : VOPCX_I32 <"v_cmpx_eq_u32">;
-defm V_CMPX_LE_U32 : VOPCX_I32 <"v_cmpx_le_u32", "v_cmpx_le_u32">;
+defm V_CMPX_LE_U32 : VOPCX_I32 <"v_cmpx_le_u32", "v_cmpx_gt_u32">;
defm V_CMPX_GT_U32 : VOPCX_I32 <"v_cmpx_gt_u32">;
defm V_CMPX_NE_U32 : VOPCX_I32 <"v_cmpx_ne_u32">;
defm V_CMPX_GE_U32 : VOPCX_I32 <"v_cmpx_ge_u32">;
defm V_CMPX_T_U32 : VOPCX_I32 <"v_cmpx_t_u32">;
defm V_CMP_F_U64 : VOPC_I64 <"v_cmp_f_u64">;
-defm V_CMP_LT_U64 : VOPC_I64 <"v_cmp_lt_u64", COND_ULT, "v_cmp_gt_u64">;
+defm V_CMP_LT_U64 : VOPC_I64 <"v_cmp_lt_u64", COND_ULT, "v_cmp_ge_u64">;
defm V_CMP_EQ_U64 : VOPC_I64 <"v_cmp_eq_u64", COND_EQ>;
-defm V_CMP_LE_U64 : VOPC_I64 <"v_cmp_le_u64", COND_ULE, "v_cmp_ge_u64">;
+defm V_CMP_LE_U64 : VOPC_I64 <"v_cmp_le_u64", COND_ULE, "v_cmp_gt_u64">;
defm V_CMP_GT_U64 : VOPC_I64 <"v_cmp_gt_u64", COND_UGT>;
defm V_CMP_NE_U64 : VOPC_I64 <"v_cmp_ne_u64", COND_NE>;
defm V_CMP_GE_U64 : VOPC_I64 <"v_cmp_ge_u64", COND_UGE>;
defm V_CMP_T_U64 : VOPC_I64 <"v_cmp_t_u64">;
defm V_CMPX_F_U64 : VOPCX_I64 <"v_cmpx_f_u64">;
-defm V_CMPX_LT_U64 : VOPCX_I64 <"v_cmpx_lt_u64", "v_cmpx_gt_u64">;
+defm V_CMPX_LT_U64 : VOPCX_I64 <"v_cmpx_lt_u64", "v_cmpx_ge_u64">;
defm V_CMPX_EQ_U64 : VOPCX_I64 <"v_cmpx_eq_u64">;
-defm V_CMPX_LE_U64 : VOPCX_I64 <"v_cmpx_le_u64", "v_cmpx_ge_u64">;
+defm V_CMPX_LE_U64 : VOPCX_I64 <"v_cmpx_le_u64", "v_cmpx_gt_u64">;
defm V_CMPX_GT_U64 : VOPCX_I64 <"v_cmpx_gt_u64">;
defm V_CMPX_NE_U64 : VOPCX_I64 <"v_cmpx_ne_u64">;
defm V_CMPX_GE_U64 : VOPCX_I64 <"v_cmpx_ge_u64">;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
index 38374d1689366..27668752120e8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
@@ -205,7 +205,7 @@ define i32 @v_saddo_i32(i32 %a, i32 %b) {
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_add_i32_e32 v2, vcc, v0, v1
; GFX7-NEXT: v_cmp_lt_i32_e32 vcc, v2, v0
-; GFX7-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v1
+; GFX7-NEXT: v_cmp_ge_i32_e64 s[4:5], 0, v1
; GFX7-NEXT: s_xor_b64 s[4:5], s[4:5], vcc
; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v2, v0
@@ -216,7 +216,7 @@ define i32 @v_saddo_i32(i32 %a, i32 %b) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v0, v1
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, v2, v0
-; GFX8-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v1
+; GFX8-NEXT: v_cmp_ge_i32_e64 s[4:5], 0, v1
; GFX8-NEXT: s_xor_b64 s[4:5], s[4:5], vcc
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
@@ -227,7 +227,7 @@ define i32 @v_saddo_i32(i32 %a, i32 %b) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v2, v0, v1
; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, v2, v0
-; GFX9-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v1
+; GFX9-NEXT: v_cmp_ge_i32_e64 s[4:5], 0, v1
; GFX9-NEXT: s_xor_b64 s[4:5], s[4:5], vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX9-NEXT: v_add_u32_e32 v0, v2, v0
@@ -247,7 +247,7 @@ define i64 @v_saddo_i64(i64 %a, i64 %b) {
; GFX7-NEXT: v_add_i32_e32 v4, vcc, v0, v2
; GFX7-NEXT: v_addc_u32_e32 v5, vcc, v1, v3, vcc
; GFX7-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX7-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
+; GFX7-NEXT: v_cmp_ge_i64_e64 s[4:5], 0, v[2:3]
; GFX7-NEXT: s_xor_b64 s[4:5], s[4:5], vcc
; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v4, v0
@@ -260,7 +260,7 @@ define i64 @v_saddo_i64(i64 %a, i64 %b) {
; GFX8-NEXT: v_add_u32_e32 v4, vcc, v0, v2
; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v1, v3, vcc
; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX8-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
+; GFX8-NEXT: v_cmp_ge_i64_e64 s[4:5], 0, v[2:3]
; GFX8-NEXT: s_xor_b64 s[4:5], s[4:5], vcc
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0
@@ -273,7 +273,7 @@ define i64 @v_saddo_i64(i64 %a, i64 %b) {
; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
-; GFX9-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
+; GFX9-NEXT: v_cmp_ge_i64_e64 s[4:5], 0, v[2:3]
; GFX9-NEXT: s_xor_b64 s[4:5], s[4:5], vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0
@@ -295,8 +295,8 @@ define <2 x i32> @v_saddo_v2i32(<2 x i32> %a, <2 x i32> %b) {
; GFX7-NEXT: v_add_i32_e32 v5, vcc, v1, v3
; GFX7-NEXT: v_cmp_lt_i32_e32 vcc, v4, v0
; GFX7-NEXT: v_cmp_lt_i32_e64 s[4:5], v5, v1
-; GFX7-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v2
-; GFX7-NEXT: v_cmp_gt_i32_e64 s[8:9], 0, v3
+; GFX7-NEXT: v_cmp_ge_i32_e64 s[6:7], 0, v2
+; GFX7-NEXT: v_cmp_ge_i32_e64 s[8:9], 0, v3
; GFX7-NEXT: s_xor_b64 s[6:7], s[6:7], vcc
; GFX7-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7]
@@ -312,8 +312,8 @@ define <2 x i32> @v_saddo_v2i32(<2 x i32> %a, <2 x i32> %b) {
; GFX8-NEXT: v_add_u32_e32 v5, vcc, v1, v3
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, v4, v0
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v5, v1
-; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v2
-; GFX8-NEXT: v_cmp_gt_i32_e64 s[8:9], 0, v3
+; GFX8-NEXT: v_cmp_ge_i32_e64 s[6:7], 0, v2
+; GFX8-NEXT: v_cmp_ge_i32_e64 s[8:9], 0, v3
; GFX8-NEXT: s_xor_b64 s[6:7], s[6:7], vcc
; GFX8-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7]
@@ -329,8 +329,8 @@ define <2 x i32> @v_saddo_v2i32(<2 x i32> %a, <2 x i32> %b) {
; GFX9-NEXT: v_add_u32_e32 v5, v1, v3
; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, v4, v0
; GFX9-NEXT: v_cmp_lt_i32_e64 s[4:5], v5, v1
-; GFX9-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v2
-; GFX9-NEXT: v_cmp_gt_i32_e64 s[8:9], 0, v3
+; GFX9-NEXT: v_cmp_ge_i32_e64 s[6:7], 0, v2
+; GFX9-NEXT: v_cmp_ge_i32_e64 s[8:9], 0, v3
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], vcc
; GFX9-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7]
@@ -355,7 +355,7 @@ define i8 @v_saddo_i8(i8 %a, i8 %b) {
; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX7-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0
; GFX7-NEXT: v_bfe_i32 v0, v1, 0, 8
-; GFX7-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX7-NEXT: v_cmp_ge_i32_e64 s[4:5], 0, v0
; GFX7-NEXT: s_xor_b64 s[4:5], s[4:5], vcc
; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v2, v0
@@ -369,7 +369,7 @@ define i8 @v_saddo_i8(i8 %a, i8 %b) {
; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0
; GFX8-NEXT: v_bfe_i32 v0, v1, 0, 8
-; GFX8-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX8-NEXT: v_cmp_ge_i32_e64 s[4:5], 0, v0
; GFX8-NEXT: s_xor_b64 s[4:5], s[4:5], vcc
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX8-NEXT: v_add_u16_e32 v0, v2, v0
@@ -403,7 +403,7 @@ define i7 @v_saddo_i7(i7 %a, i7 %b) {
; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 7
; GFX7-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0
; GFX7-NEXT: v_bfe_i32 v0, v1, 0, 7
-; GFX7-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX7-NEXT: v_cmp_ge_i32_e64 s[4:5], 0, v0
; GFX7-NEXT: s_xor_b64 s[4:5], s[4:5], vcc
; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v2, v0
@@ -417,7 +417,7 @@ define i7 @v_saddo_i7(i7 %a, i7 %b) {
; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 7
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0
; GFX8-NEXT: v_bfe_i32 v0, v1, 0, 7
-; GFX8-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX8-NEXT: v_cmp_ge_i32_e64 s[4:5], 0, v0
; GFX8-NEXT: s_xor_b64 s[4:5], s[4:5], vcc
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX8-NEXT: v_add_u16_e32 v0, v2, v0
@@ -431,7 +431,7 @@ define i7 @v_saddo_i7(i7 %a, i7 %b) {
; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 7
; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0
; GFX9-NEXT: v_bfe_i32 v0, v1, 0, 7
-; GFX9-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cmp_ge_i32_e64 s[4:5], 0, v0
; GFX9-NEXT: s_xor_b64 s[4:5], s[4:5], vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX9-NEXT: v_add_u16_e32 v0, v2, v0
@@ -802,7 +802,7 @@ define i8 @s_saddo_i8(i8 %a, i8 %b) {
; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX7-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0
; GFX7-NEXT: v_bfe_i32 v0, v1, 0, 8
-; GFX7-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX7-NEXT: v_cmp_ge_i32_e64 s[4:5], 0, v0
; GFX7-NEXT: s_xor_b64 s[4:5], s[4:5], vcc
; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v2, v0
@@ -816,7 +816,7 @@ define i8 @s_saddo_i8(i8 %a, i8 %b) {
; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0
; GFX8-NEXT: v_bfe_i32 v0, v1, 0, 8
-; GFX8-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX8-NEXT: v_cmp_ge_i32_e64 s[4:5], 0, v0
; GFX8-NEXT: s_xor_b64 s[4:5], s[4:5], vcc
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX8-NEXT: v_add_u16_e32 v0, v2, v0
@@ -850,7 +850,7 @@ define i7 @s_saddo_i7(i7 %a, i7 %b) {
; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 7
; GFX7-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0
; GFX7-NEXT: v_bfe_i32 v0, v1, 0, 7
-; GFX7-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX7-NEXT: v_cmp_ge_i32_e64 s[4:5], 0, v0
; GFX7-NEXT: s_xor_b64 s[4:5], s[4:5], vcc
; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v2, v0
@@ -864,7 +864,7 @@ define i7 @s_saddo_i7(i7 %a, i7 %b) {
; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 7
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0
; GFX8-NEXT: v_bfe_i32 v0, v1, 0, 7
-; GFX8-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX8-NEXT: v_cmp_ge_i32_e64 s[4:5], 0, v0
; GFX8-NEXT: s_xor_b64 s[4:5], s[4:5], vcc
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX8-NEXT: v_add_u16_e32 v0, v2, v0
@@ -878,7 +878,7 @@ define i7 @s_saddo_i7(i7 %a, i7 %b) {
; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 7
; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0
; GFX9-NEXT: v_bfe_i32 v0, v1, 0, 7
-; GFX9-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cmp_ge_i32_e64 s[4:5], 0, v0
; GFX9-NEXT: s_xor_b64 s[4:5], s[4:5], vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX9-NEXT: v_add_u16_e32 v0, v2, v0
@@ -969,8 +969,8 @@ define amdgpu_ps i32 @saddo_i32_sv(i32 inreg %a, i32 %b) {
; GFX7-LABEL: saddo_i32_sv:
; GFX7: ; %bb.0:
; GFX7-NEXT: v_add_i32_e32 v1, vcc, s0, v0
-; GFX7-NEXT: v_cmp_gt_i32_e32 vcc, s0, v1
-; GFX7-NEXT: v_cmp_gt_i32_e64 s[0:1], 0, v0
+; GFX7-NEXT: v_cmp_ge_i32_e32 vcc, s0, v1
+; GFX7-NEXT: v_cmp_ge_i32_e64 s[0:1], 0, v0
; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], vcc
; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v1, v0
@@ -980,8 +980,8 @@ define amdgpu_ps i32 @saddo_i32_sv(i32 inreg %a, i32 %b) {
; GFX8-LABEL: saddo_i32_sv:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_add_u32_e32 v1, vcc, s0, v0
-; GFX8-NEXT: v_cmp_gt_i32_e32 vcc, s0, v1
-; GFX8-NEXT: v_cmp_gt_i32_e64 s[0:1], 0, v0
+; GFX8-NEXT: v_cmp_ge_i32_e32 vcc, s0, v1
+; GFX8-NEXT: v_cmp_ge_i32_e64 s[0:1], 0, v0
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], vcc
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v0
@@ -991,8 +991,8 @@ define amdgpu_ps i32 @saddo_i32_sv(i32 inreg %a, i32 %b) {
; GFX9-LABEL: saddo_i32_sv:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_add_u32_e32 v1, s0, v0
-; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, s0, v1
-; GFX9-NEXT: v_cmp_gt_i32_e64 s[0:1], 0, v0
+; GFX9-NEXT: v_cmp_ge_i32_e32 vcc, s0, v1
+; GFX9-NEXT: v_cmp_ge_i32_e64 s[0:1], 0, v0
; GFX9-NEXT: s_xor_b64 s[0:1], s[0:1], vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX9-NEXT: v_add_u32_e32 v0, v1, v0
@@ -1013,8 +1013,8 @@ define amdgpu_ps i16 @saddo_i16_sv(i16 inreg %a, i16 %b) {
; GFX7-NEXT: v_bfe_i32 v2, v1, 0, 16
; GFX7-NEXT: s_sext_i32_i16 s0, s0
; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX7-NEXT: v_cmp_gt_i32_e32 vcc, s0, v2
-; GFX7-NEXT: v_cmp_gt_i32_e64 s[0:1], 0, v0
+; GFX7-NEXT: v_cmp_ge_i32_e32 vcc, s0, v2
+; GFX7-NEXT: v_cmp_ge_i32_e64 s[0:1], 0, v0
; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], vcc
; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v1, v0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
index 353c09b4b0bfb..3f61fdd8309ec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
@@ -1666,7 +1666,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v5
; GFX6-NEXT: v_ashr_i64 v[4:5], v[4:5], v2
; GFX6-NEXT: v_or_b32_e32 v7, v7, v9
-; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
+; GFX6-NEXT: v_cmp_ge_u32_e32 vcc, 64, v3
; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc
; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
@@ -1689,7 +1689,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX8-NEXT: v_ashrrev_i32_e32 v8, 31, v5
; GFX8-NEXT: v_ashrrev_i64 v[4:5], v2, v[4:5]
; GFX8-NEXT: v_or_b32_e32 v7, v7, v9
-; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
+; GFX8-NEXT: v_cmp_ge_u32_e32 vcc, 64, v3
; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc
; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
@@ -1712,7 +1712,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX9-NEXT: v_ashrrev_i32_e32 v8, 31, v5
; GFX9-NEXT: v_ashrrev_i64 v[4:5], v2, v[4:5]
; GFX9-NEXT: v_or_b32_e32 v7, v7, v9
-; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
+; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, 64, v3
; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc
; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
@@ -1728,7 +1728,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v3
; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
; GFX10-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
-; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
+; GFX10-NEXT: v_cmp_ge_u32_e32 vcc_lo, 64, v3
; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 0, v3
; GFX10-NEXT: v_lshlrev_b64 v[8:9], v2, v[4:5]
@@ -1750,7 +1750,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX11-NEXT: v_bfe_i32 v4, v2, 0, 1
; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v3
; GFX11-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
-; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
+; GFX11-NEXT: v_cmp_ge_u32_e32 vcc_lo, 64, v3
; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 0, v3
; GFX11-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GFX11-NEXT: v_lshlrev_b64 v[8:9], v2, v[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
index 11acd451d98d7..0810342185c64 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
@@ -30,11 +30,11 @@ define amdgpu_ps void @divergent_i1_phi_uniform_branch(ptr addrspace(1) %out, i3
; GFX10-NEXT: s_cmp_lg_u32 s0, 0
; GFX10-NEXT: s_cbranch_scc0 .LBB0_2
; GFX10-NEXT: ; %bb.1:
-; GFX10-NEXT: v_cmp_le_u32_e64 s0, 6, v2
+; GFX10-NEXT: v_cmp_lt_u32_e64 s0, 6, v2
; GFX10-NEXT: s_branch .LBB0_3
; GFX10-NEXT: .LBB0_2: ; %dummy
; GFX10-NEXT: v_mov_b32_e32 v5, 0x7b
-; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 1, v2
+; GFX10-NEXT: v_cmp_ge_u32_e64 s0, 1, v2
; GFX10-NEXT: global_store_dword v[3:4], v5, off
; GFX10-NEXT: .LBB0_3: ; %exit
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, s0
@@ -65,11 +65,11 @@ exit:
define amdgpu_ps void @divergent_i1_phi_uniform_branch_simple(ptr addrspace(1) %out, i32 %tid, i32 inreg %cond) {
; GFX10-LABEL: divergent_i1_phi_uniform_branch_simple:
; GFX10: ; %bb.0: ; %A
-; GFX10-NEXT: v_cmp_le_u32_e64 s1, 6, v2
+; GFX10-NEXT: v_cmp_lt_u32_e64 s1, 6, v2
; GFX10-NEXT: s_cmp_lg_u32 s0, 0
; GFX10-NEXT: s_cbranch_scc1 .LBB1_2
; GFX10-NEXT: ; %bb.1: ; %B
-; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 1, v2
+; GFX10-NEXT: v_cmp_ge_u32_e32 vcc_lo, 1, v2
; GFX10-NEXT: s_andn2_b32 s0, s1, exec_lo
; GFX10-NEXT: s_and_b32 s1, exec_lo, vcc_lo
; GFX10-NEXT: s_or_b32 s1, s0, s1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
index be90b02a6dd65..9c9be2db98acf...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/146398
More information about the llvm-commits
mailing list