[llvm] [AMDGPU] Swap select operands to allow later v_cndmask shrinking into vop2 (PR #142354)

Ana Mihajlovic via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 10 06:23:05 PDT 2025


https://github.com/mihajlovicana updated https://github.com/llvm/llvm-project/pull/142354

>From cd2f872741b17f635234f4cd833d8ce660da76e1 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Mon, 2 Jun 2025 10:41:58 +0200
Subject: [PATCH 1/6] test precommit

---
 llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll | 764 +++++++++++++++++++++
 1 file changed, 764 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll

diff --git a/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll b/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll
new file mode 100644
index 0000000000000..12ccdfff07c6f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll
@@ -0,0 +1,764 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefix=GCN
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefix=GCN
+
+;tests for integer 32
+define amdgpu_cs void @test_i32_sge(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_i32_sge:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 1, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp sge i32 %a, 2
+  %val1 = select i1 %vcc, i32 %p, i32 0
+  %val2 = select i1 %vcc, i32 %q, i32 0
+  %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+  store <2 x i32> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_i32_sle(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_i32_sle:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 3, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp sle i32 %a, 2
+  %val1 = select i1 %vcc, i32 %p, i32 0
+  %val2 = select i1 %vcc, i32 %q, i32 0
+  %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+  store <2 x i32> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_i32_sgt(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_i32_sgt:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 2, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp sgt i32 2, %a
+  %val1 = select i1 %vcc, i32 0, i32 %p
+  %val2 = select i1 %vcc, i32 0, i32 %q
+  %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+  store <2 x i32> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_i32_slt(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_i32_slt:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 2, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp slt i32 2, %a
+  %val1 = select i1 %vcc, i32 0, i32 %p
+  %val2 = select i1 %vcc, i32 0, i32 %q
+  %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+  store <2 x i32> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+;tests for integer 64
+define amdgpu_cs void @test_i64_sge(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_i64_sge:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_lt_i64_e32 vcc_lo, 1, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
+; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp sge i64 %a, 2
+  %val1 = select i1 %vcc, i64 %p, i64 0
+  %val2 = select i1 %vcc, i64 %q, i64 0
+  %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+  %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+  store <2 x i64> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_i64_sle(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_i64_sle:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_gt_i64_e32 vcc_lo, 3, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
+; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp sle i64 %a, 2
+  %val1 = select i1 %vcc, i64 %p, i64 0
+  %val2 = select i1 %vcc, i64 %q, i64 0
+  %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+  %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+  store <2 x i64> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_i64_sgt(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_i64_sgt:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_gt_i64_e32 vcc_lo, 2, v[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp sgt i64 2, %a
+  %val1 = select i1 %vcc, i64 0, i64 %p
+  %val2 = select i1 %vcc, i64 0, i64 %q
+  %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+  %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+  store <2 x i64> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_i64_slt(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_i64_slt:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_lt_i64_e32 vcc_lo, 2, v[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp slt i64 2, %a
+  %val1 = select i1 %vcc, i64 0, i64 %p
+  %val2 = select i1 %vcc, i64 0, i64 %q
+  %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+  %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+  store <2 x i64> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+;tests for unsigned 32
+define amdgpu_cs void @test_u32_eq(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_eq:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp eq i32 1, %a
+  %val1 = select i1 %vcc, i32 0, i32 %p
+  %val2 = select i1 %vcc, i32 0, i32 %q
+  %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+  store <2 x i32> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_negative_case(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_negative_case:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, -1, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp eq i32 %a, -1
+  %val1 = select i1 %vcc, i32 %p, i32 0
+  %val2 = select i1 %vcc, i32 0, i32 %q
+  %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+  store <2 x i32> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_mixed(i32 %a, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) {
+; GCN-LABEL: test_mixed:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, -1, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v3, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v4, 0, vcc_lo
+; GCN-NEXT:    global_store_b128 v[5:6], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp eq i32 -1, %a
+  %val1 = select i1 %vcc, i32 0, i32 %p
+  %val2 = select i1 %vcc, i32 %q, i32 0
+  %val3 = select i1 %vcc, i32 0, i32 %r
+  %val4 = select i1 %vcc, i32 0, i32 %s
+  %ret0 = insertelement <4 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <4 x i32> %ret0, i32 %val2, i32 1
+  %ret2 = insertelement <4 x i32> %ret1, i32 %val3, i32 2
+  %ret3 = insertelement <4 x i32> %ret2, i32 %val4, i32 3
+  store <4 x i32> %ret3, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_sgpr(i32 %a, i32 %p, i32 inreg %q, i32 inreg %r, ptr addrspace(1) %out) {
+; GCN-LABEL: test_sgpr:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, -1, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v4, 0, v1, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v5, s0, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v6, s1, 0, vcc_lo
+; GCN-NEXT:    global_store_b96 v[2:3], v[4:6], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp eq i32 %a, -1
+  %val1 = select i1 %vcc, i32 %p, i32 0
+  %val2 = select i1 %vcc, i32 0, i32 %q
+  %val3 = select i1 %vcc, i32 0, i32 %r
+  %ret0 = insertelement <3 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <3 x i32> %ret0, i32 %val2, i32 1
+  %ret2 = insertelement <3 x i32> %ret1, i32 %val3, i32 2
+  store <3 x i32> %ret2, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u32_ne(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_ne:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 1, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp ne i32 1, %a
+  %val1 = select i1 %vcc, i32 0, i32 %p
+  %val2 = select i1 %vcc, i32 0, i32 %q
+  %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+  store <2 x i32> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u32_uge(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_uge:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 1, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp uge i32 %a, 2
+  %val1 = select i1 %vcc, i32 %p, i32 0
+  %val2 = select i1 %vcc, i32 %q, i32 0
+  %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+  store <2 x i32> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u32_ule(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_ule:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 3, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp ule i32 %a, 2
+  %val1 = select i1 %vcc, i32 %p, i32 0
+  %val2 = select i1 %vcc, i32 %q, i32 0
+  %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+  store <2 x i32> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u32_ugt(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_ugt:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 2, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp ugt i32 2, %a
+  %val1 = select i1 %vcc, i32 0, i32 %p
+  %val2 = select i1 %vcc, i32 0, i32 %q
+  %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+  store <2 x i32> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u32_ult(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u32_ult:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 2, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp ult i32 2, %a
+  %val1 = select i1 %vcc, i32 0, i32 %p
+  %val2 = select i1 %vcc, i32 0, i32 %q
+  %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1
+  store <2 x i32> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+;tests for unsigned 64
+define amdgpu_cs void @test_u64_eq(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_eq:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 1, v[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp eq i64 1, %a
+  %val1 = select i1 %vcc, i64 0, i64 %p
+  %val2 = select i1 %vcc, i64 0, i64 %q
+  %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+  %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+  store <2 x i64> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u64_ne(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_ne:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_ne_u64_e32 vcc_lo, 1, v[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp ne i64 1, %a
+  %val1 = select i1 %vcc, i64 0, i64 %p
+  %val2 = select i1 %vcc, i64 0, i64 %q
+  %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+  %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+  store <2 x i64> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u64_uge(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_uge:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_lt_u64_e32 vcc_lo, 1, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
+; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp uge i64 %a, 2
+  %val1 = select i1 %vcc, i64 %p, i64 0
+  %val2 = select i1 %vcc, i64 %q, i64 0
+  %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+  %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+  store <2 x i64> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u64_ule(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_ule:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_gt_u64_e32 vcc_lo, 3, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
+; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp ule i64 %a, 2
+  %val1 = select i1 %vcc, i64 %p, i64 0
+  %val2 = select i1 %vcc, i64 %q, i64 0
+  %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+  %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+  store <2 x i64> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u64_ugt(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_ugt:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_gt_u64_e32 vcc_lo, 2, v[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp ugt i64 2, %a
+  %val1 = select i1 %vcc, i64 0, i64 %p
+  %val2 = select i1 %vcc, i64 0, i64 %q
+  %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+  %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+  store <2 x i64> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_u64_ult(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_u64_ult:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_lt_u64_e32 vcc_lo, 2, v[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp ult i64 2, %a
+  %val1 = select i1 %vcc, i64 0, i64 %p
+  %val2 = select i1 %vcc, i64 0, i64 %q
+  %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0
+  %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1
+  store <2 x i64> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+;tests for float 32
+define amdgpu_cs void @test_f32_oeq(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_oeq:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_eq_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp oeq float %a, 2.0
+  %val1 = select i1 %vcc, float 0.0, float %p
+  %val2 = select i1 %vcc, float 0.0, float %q
+  %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+  %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+  store <2 x float> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f32_negative_modifiers(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_negative_modifiers:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_eq_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, -v1, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, -v2, 0, vcc_lo
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %r = fneg float %p
+  %s = fneg  float %q
+  %vcc = fcmp oeq float 2.0, %a
+  %val1 = select i1 %vcc, float 0.0, float %r
+  %val2 = select i1 %vcc, float 0.0, float %s
+  %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+  %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+  store <2 x float> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f32_one(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_one:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_lg_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp one float %a, 2.0
+  %val1 = select i1 %vcc, float 0.0, float %p
+  %val2 = select i1 %vcc, float 0.0, float %q
+  %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+  %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+  store <2 x float> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f32_ord(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_ord:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp ord float %a, 2.0
+  %val1 = select i1 %vcc, float 0.0, float %p
+  %val2 = select i1 %vcc, float 0.0, float %q
+  %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+  %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+  store <2 x float> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f32_uno(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_uno:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp uno float %a, 2.0
+  %val1 = select i1 %vcc, float 0.0, float %p
+  %val2 = select i1 %vcc, float 0.0, float %q
+  %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+  %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+  store <2 x float> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f32_oge(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_oge:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_ge_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp oge float 2.0, %a
+  %val1 = select i1 %vcc, float 0.0, float %p
+  %val2 = select i1 %vcc, float 0.0, float %q
+  %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+  %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+  store <2 x float> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f32_ole(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_ole:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_le_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp ole float 2.0, %a
+  %val1 = select i1 %vcc, float 0.0, float %p
+  %val2 = select i1 %vcc, float 0.0, float %q
+  %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+  %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+  store <2 x float> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f32_ogt(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_ogt:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp ogt float 2.0, %a
+  %val1 = select i1 %vcc, float 0.0, float %p
+  %val2 = select i1 %vcc, float 0.0, float %q
+  %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+  %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+  store <2 x float> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f32_olt(float %a, float %p, float %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f32_olt:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_lt_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp olt float 2.0, %a
+  %val1 = select i1 %vcc, float 0.0, float %p
+  %val2 = select i1 %vcc, float 0.0, float %q
+  %ret0 = insertelement <2 x float> poison, float %val1, i32 0
+  %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1
+  store <2 x float> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+;tests for float64
+define amdgpu_cs void @test_f64_oeq(double %a, double %p, double %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_oeq:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_eq_f64_e32 vcc_lo, 2.0, v[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp oeq double 2.0, %a
+  %val1 = select i1 %vcc, double 0.0, double %p
+  %val2 = select i1 %vcc, double 0.0, double %q
+  %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+  %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+  store <2 x double> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f64_one(double %a, double %p, double %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_one:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_lg_f64_e32 vcc_lo, 2.0, v[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp one double 2.0, %a
+  %val1 = select i1 %vcc, double 0.0, double %p
+  %val2 = select i1 %vcc, double 0.0, double %q
+  %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+  %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+  store <2 x double> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f64_oge(double %a, double %p, double %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_oge:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_ge_f64_e32 vcc_lo, 2.0, v[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp oge double 2.0, %a
+  %val1 = select i1 %vcc, double 0.0, double %p
+  %val2 = select i1 %vcc, double 0.0, double %q
+  %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+  %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+  store <2 x double> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f64_ole(double %a, double %p, double %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_ole:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_le_f64_e32 vcc_lo, 2.0, v[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp ole double 2.0, %a
+  %val1 = select i1 %vcc, double 0.0, double %p
+  %val2 = select i1 %vcc, double 0.0, double %q
+  %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+  %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+  store <2 x double> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f64_ogt(double %a, double %p, double %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_ogt:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_gt_f64_e32 vcc_lo, 2.0, v[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp ogt double 2.0, %a
+  %val1 = select i1 %vcc, double 0.0, double %p
+  %val2 = select i1 %vcc, double 0.0, double %q
+  %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+  %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+  store <2 x double> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f64_olt(double %a, double %p, double %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_olt:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_lt_f64_e32 vcc_lo, 2.0, v[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp olt double 2.0, %a
+  %val1 = select i1 %vcc, double 0.0, double %p
+  %val2 = select i1 %vcc, double 0.0, double %q
+  %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+  %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+  store <2 x double> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f64_ord(double %a, double %p, double %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_ord:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp ord double 2.0, %a
+  %val1 = select i1 %vcc, double 0.0, double %p
+  %val2 = select i1 %vcc, double 0.0, double %q
+  %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+  %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+  store <2 x double> %ret1, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs void @test_f64_uno(double %a, double %p, double %q, ptr addrspace(1) %out) {
+; GCN-LABEL: test_f64_uno:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = fcmp uno double 2.0, %a
+  %val1 = select i1 %vcc, double 0.0, double %p
+  %val2 = select i1 %vcc, double 0.0, double %q
+  %ret0 = insertelement <2 x double> poison, double %val1, i32 0
+  %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1
+  store <2 x double> %ret1, ptr addrspace(1) %out
+  ret void
+}

>From d2c375298be110377e8262ff92702c6076a4744e Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Mon, 2 Jun 2025 12:06:29 +0200
Subject: [PATCH 2/6] [AMDGPU] Swap select operands to allow later v_cndmask
 shrinking into vop2

---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |   30 +-
 .../CodeGen/AMDGPU/extract_vector_dynelt.ll   |   93 +-
 llvm/test/CodeGen/AMDGPU/fmaximum3.ll         |  204 +-
 llvm/test/CodeGen/AMDGPU/fminimum3.ll         |  204 +-
 llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll |   24 +-
 .../CodeGen/AMDGPU/insert_vector_dynelt.ll    |   48 +-
 llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll  | 1678 ++++++++---------
 llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll  | 1678 ++++++++---------
 llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll    |    5 +-
 llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll    |  188 +-
 llvm/test/CodeGen/AMDGPU/uaddsat.ll           |   29 +-
 llvm/test/CodeGen/AMDGPU/usubsat.ll           |   29 +-
 llvm/test/CodeGen/AMDGPU/v_cndmask.ll         |   30 +-
 .../CodeGen/AMDGPU/vector-reduce-fmaximum.ll  |  840 ++++-----
 .../CodeGen/AMDGPU/vector-reduce-fminimum.ll  |  840 ++++-----
 15 files changed, 2954 insertions(+), 2966 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 5f41bd7d8a617..1a5d2232213ec 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4710,6 +4710,11 @@ AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
   return SDValue();
 }
 
+bool isFnegOrFabs(SDValue &V) {
+  unsigned Opcode = V.getOpcode();
+  return Opcode == ISD::FNEG || Opcode == ISD::FABS;
+}
+
 SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
                                                    DAGCombinerInfo &DCI) const {
   if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0)))
@@ -4727,7 +4732,30 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
   SDValue True = N->getOperand(1);
   SDValue False = N->getOperand(2);
 
-  if (Cond.hasOneUse()) { // TODO: Look for multiple select uses.
+  int ShouldSwap = 0;
+  for (auto it = Cond->use_begin(); it != Cond->use_end(); it++) {
+    auto User = it->getUser();
+
+    if (User->getOpcode() != ISD::SELECT) {
+      ShouldSwap = 0;
+      break;
+    }
+
+    auto Op1 = User->getOperand(1);
+    auto Op2 = User->getOperand(2);
+
+    // if the operand is defined by fneg or fabs it means the instruction
+    // will have source modifiers and therefore can't be shrinked to vop2
+    if (isFnegOrFabs(Op1) || isFnegOrFabs(Op2))
+      continue;
+
+    if (!Op1->isDivergent() && Op2->isDivergent())
+      ShouldSwap++;
+    else if (Op1->isDivergent() && !Op2->isDivergent())
+      ShouldSwap--;
+  }
+
+  if (Cond->hasOneUse() || ShouldSwap > 0) {
     SelectionDAG &DAG = DCI.DAG;
     if (DAG.isConstantValueOfAnyType(True) &&
         !DAG.isConstantValueOfAnyType(False)) {
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
index c69b0cce3d208..9ddf3e9340435 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
@@ -1097,71 +1097,72 @@ define double @double16_extelt_vec(i32 %sel) {
 ; GCN-LABEL: double16_extelt_vec:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v3, 0x3ff19999
-; GCN-NEXT:    v_mov_b32_e32 v4, 0x4000cccc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
-; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 2, v0
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
+; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v0
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0x9999999a
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0xcccccccd
-; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
-; GCN-NEXT:    v_mov_b32_e32 v4, 0x4008cccc
-; GCN-NEXT:    s_or_b64 vcc, s[4:5], vcc
+; GCN-NEXT:    v_mov_b32_e32 v3, 0x3ff19999
+; GCN-NEXT:    v_mov_b32_e32 v4, 0x4000cccc
+; GCN-NEXT:    s_or_b64 vcc, vcc, s[4:5]
 ; GCN-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
 ; GCN-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
+; GCN-NEXT:    v_mov_b32_e32 v4, 0x4008cccc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 2, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0x40106666
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 3, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0x40146666
-; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 4, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
-; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
+; GCN-NEXT:    v_cmp_ne_u32_e64 s[4:5], 4, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v4, v3, s[4:5]
+; GCN-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0x40186666
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
-; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 5, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; GCN-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v5, 0x401c6666
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 6, v0
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0x66666666
-; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
-; GCN-NEXT:    s_or_b64 vcc, vcc, s[4:5]
-; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GCN-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; GCN-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0x40203333
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 7, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0x40223333
-; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 8, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
-; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
+; GCN-NEXT:    v_cmp_ne_u32_e64 s[4:5], 8, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v4, v3, s[4:5]
+; GCN-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0x40243333
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
-; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 9, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; GCN-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0x40263333
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
-; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 10, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; GCN-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0x40283333
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
-; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 11, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; GCN-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0x402a3333
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
-; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 12, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; GCN-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0x402c3333
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
-; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 13, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; GCN-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v5, 0x402e3333
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v0
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 14, v0
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0x33333333
-; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
-; GCN-NEXT:    s_or_b64 vcc, vcc, s[4:5]
-; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 15, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GCN-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; GCN-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 15, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0x40301999
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %ext = extractelement <16 x double> <double 1.1, double 2.1, double 3.1, double 4.1, double 5.1, double 6.1, double 7.1, double 8.1, double 9.1, double 10.1, double 11.1, double 12.1, double 13.1, double 14.1, double 15.1, double 16.1>, i32 %sel
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index 53d940e1e6c1a..8a17a759ac334 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -3167,15 +3167,15 @@ define double @v_fmaximum3_f64(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.maximum.f64(double %a, double %b)
   %max1 = call double @llvm.maximum.f64(double %max0, double %c)
@@ -3200,15 +3200,15 @@ define double @v_fmaximum3_f64_commute(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[4:5], v[0:1]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[0:1]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[4:5], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.maximum.f64(double %a, double %b)
   %max1 = call double @llvm.maximum.f64(double %c, double %max0)
@@ -3274,15 +3274,15 @@ define double @v_fmaximum3_f64_fabs0(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[6:7], |v[0:1]|, v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, |v[0:1]|, v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call double @llvm.fabs.f64(double %a)
   %max0 = call double @llvm.maximum.f64(double %a.fabs, double %b)
@@ -3308,15 +3308,15 @@ define double @v_fmaximum3_f64_fabs1(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], |v[2:3]|
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, v[0:1], |v[2:3]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %b.fabs = call double @llvm.fabs.f64(double %b)
   %max0 = call double @llvm.maximum.f64(double %a, double %b.fabs)
@@ -3342,15 +3342,15 @@ define double @v_fmaximum3_f64_fabs2(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], |v[4:5]|
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, v[0:1], |v[4:5]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %c.fabs = call double @llvm.fabs.f64(double %c)
   %max0 = call double @llvm.maximum.f64(double %a, double %b)
@@ -3376,15 +3376,15 @@ define double @v_fmaximum3_f64_fabs_all(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[6:7], |v[0:1]|, |v[2:3]|
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, |v[0:1]|, |v[2:3]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], |v[4:5]|
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, v[0:1], |v[4:5]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call double @llvm.fabs.f64(double %a)
   %b.fabs = call double @llvm.fabs.f64(double %b)
@@ -3412,15 +3412,15 @@ define double @v_fmaximum3_f64_fneg_all(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[6:7], -v[0:1], -v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, -v[0:1], -v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], -v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, v[0:1], -v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fneg = fneg double %a
   %b.fneg = fneg double %b
@@ -3448,15 +3448,15 @@ define double @v_fmaximum3_f64_fneg_fabs_all(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[6:7], -|v[0:1]|, -|v[2:3]|
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], -|v[4:5]|
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -|v[4:5]|
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, v[0:1], -|v[4:5]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call double @llvm.fabs.f64(double %a)
   %b.fabs = call double @llvm.fabs.f64(double %b)
@@ -3487,15 +3487,15 @@ define double @v_fmaximum3_f64_fneg0(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[6:7], -v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, -v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fneg = fneg double %a
   %max0 = call double @llvm.maximum.f64(double %a.fneg, double %b)
@@ -3521,15 +3521,15 @@ define double @v_fmaximum3_f64_fneg1(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], -v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, v[0:1], -v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %b.fneg = fneg double %b
   %max0 = call double @llvm.maximum.f64(double %a, double %b.fneg)
@@ -3555,15 +3555,15 @@ define double @v_fmaximum3_f64_fneg2(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], -v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, v[0:1], -v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %c.fneg = fneg double %c
   %max0 = call double @llvm.maximum.f64(double %a, double %b)
@@ -3591,15 +3591,15 @@ define double @v_fmaximum3_f64_const0(double %b, double %c) {
 ; GFX9-NEXT:    s_mov_b32 s1, 0x40200000
 ; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], s[0:1]
 ; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
 ; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.maximum.f64(double 8.0, double %b)
   %max1 = call double @llvm.maximum.f64(double %max0, double %c)
@@ -3624,16 +3624,16 @@ define double @v_fmaximum3_f64__const2(double %a, double %b) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_mov_b32 s0, 0
 ; GFX9-NEXT:    s_mov_b32 s1, 0x40200000
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], s[0:1]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.maximum.f64(double %a, double %b)
   %max1 = call double @llvm.maximum.f64(double %max0, double 8.0)
@@ -3658,15 +3658,15 @@ define double @v_fmaximum3_f64_inlineimm0(double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], 4.0
 ; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
 ; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.maximum.f64(double 4.0, double %b)
   %max1 = call double @llvm.maximum.f64(double %max0, double %c)
@@ -3691,15 +3691,15 @@ define double @v_fmaximum3_f64__inlineimm(double %a, double %b) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], 4.0
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.maximum.f64(double %a, double %b)
   %max1 = call double @llvm.maximum.f64(double %max0, double 4.0)
@@ -3726,16 +3726,16 @@ define double @v_fmaximum3_f64_const1_const2(double %a) {
 ; GFX9-NEXT:    s_mov_b32 s1, 0x40200000
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], s[0:1]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_mov_b32 s0, 0
 ; GFX9-NEXT:    s_mov_b32 s1, 0x40300000
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], s[0:1]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.maximum.f64(double %a, double 8.0)
   %max1 = call double @llvm.maximum.f64(double %max0, double 16.0)
@@ -4003,15 +4003,15 @@ define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.maximum.f64(double %a, double %b)
   %max1 = call double @llvm.maximum.f64(double %max0, double %c)
diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
index d1d0c0dcdb7e0..58d89d4076376 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
@@ -3167,15 +3167,15 @@ define double @v_fminimum3_f64(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.minimum.f64(double %a, double %b)
   %max1 = call double @llvm.minimum.f64(double %max0, double %c)
@@ -3200,15 +3200,15 @@ define double @v_fminimum3_f64_commute(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[4:5], v[0:1]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[0:1]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[4:5], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.minimum.f64(double %a, double %b)
   %max1 = call double @llvm.minimum.f64(double %c, double %max0)
@@ -3274,15 +3274,15 @@ define double @v_fminimum3_f64_fabs0(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[6:7], |v[0:1]|, v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, |v[0:1]|, v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call double @llvm.fabs.f64(double %a)
   %max0 = call double @llvm.minimum.f64(double %a.fabs, double %b)
@@ -3308,15 +3308,15 @@ define double @v_fminimum3_f64_fabs1(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[6:7], v[0:1], |v[2:3]|
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, v[0:1], |v[2:3]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %b.fabs = call double @llvm.fabs.f64(double %b)
   %max0 = call double @llvm.minimum.f64(double %a, double %b.fabs)
@@ -3342,15 +3342,15 @@ define double @v_fminimum3_f64_fabs2(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], |v[4:5]|
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, v[0:1], |v[4:5]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %c.fabs = call double @llvm.fabs.f64(double %c)
   %max0 = call double @llvm.minimum.f64(double %a, double %b)
@@ -3376,15 +3376,15 @@ define double @v_fminimum3_f64_fabs_all(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[6:7], |v[0:1]|, |v[2:3]|
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, |v[0:1]|, |v[2:3]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], |v[4:5]|
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, v[0:1], |v[4:5]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call double @llvm.fabs.f64(double %a)
   %b.fabs = call double @llvm.fabs.f64(double %b)
@@ -3412,15 +3412,15 @@ define double @v_fminimum3_f64_fneg_all(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[6:7], -v[0:1], -v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, -v[0:1], -v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], -v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, v[0:1], -v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fneg = fneg double %a
   %b.fneg = fneg double %b
@@ -3448,15 +3448,15 @@ define double @v_fminimum3_f64_fneg_fabs_all(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[6:7], -|v[0:1]|, -|v[2:3]|
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], -|v[4:5]|
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -|v[4:5]|
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, v[0:1], -|v[4:5]|
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fabs = call double @llvm.fabs.f64(double %a)
   %b.fabs = call double @llvm.fabs.f64(double %b)
@@ -3487,15 +3487,15 @@ define double @v_fminimum3_f64_fneg0(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[6:7], -v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, -v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %a.fneg = fneg double %a
   %max0 = call double @llvm.minimum.f64(double %a.fneg, double %b)
@@ -3521,15 +3521,15 @@ define double @v_fminimum3_f64_fneg1(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[6:7], v[0:1], -v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, v[0:1], -v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %b.fneg = fneg double %b
   %max0 = call double @llvm.minimum.f64(double %a, double %b.fneg)
@@ -3555,15 +3555,15 @@ define double @v_fminimum3_f64_fneg2(double %a, double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], -v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e64 vcc, v[0:1], -v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %c.fneg = fneg double %c
   %max0 = call double @llvm.minimum.f64(double %a, double %b)
@@ -3591,15 +3591,15 @@ define double @v_fminimum3_f64_const0(double %b, double %c) {
 ; GFX9-NEXT:    s_mov_b32 s1, 0x40200000
 ; GFX9-NEXT:    v_min_f64 v[4:5], v[0:1], s[0:1]
 ; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
 ; GFX9-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.minimum.f64(double 8.0, double %b)
   %max1 = call double @llvm.minimum.f64(double %max0, double %c)
@@ -3624,16 +3624,16 @@ define double @v_fminimum3_f64__const2(double %a, double %b) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_mov_b32 s0, 0
 ; GFX9-NEXT:    s_mov_b32 s1, 0x40200000
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], s[0:1]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.minimum.f64(double %a, double %b)
   %max1 = call double @llvm.minimum.f64(double %max0, double 8.0)
@@ -3658,15 +3658,15 @@ define double @v_fminimum3_f64_inlineimm0(double %b, double %c) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[4:5], v[0:1], 4.0
 ; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
 ; GFX9-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.minimum.f64(double 4.0, double %b)
   %max1 = call double @llvm.minimum.f64(double %max0, double %c)
@@ -3691,15 +3691,15 @@ define double @v_fminimum3_f64__inlineimm(double %a, double %b) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v6, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], 4.0
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.minimum.f64(double %a, double %b)
   %max1 = call double @llvm.minimum.f64(double %max0, double 4.0)
@@ -3726,16 +3726,16 @@ define double @v_fminimum3_f64_const1_const2(double %a) {
 ; GFX9-NEXT:    s_mov_b32 s1, 0x40200000
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], s[0:1]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_mov_b32 s0, 0
 ; GFX9-NEXT:    s_mov_b32 s1, 0x40300000
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], s[0:1]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.minimum.f64(double %a, double 8.0)
   %max1 = call double @llvm.minimum.f64(double %max0, double 16.0)
@@ -4003,15 +4003,15 @@ define <2 x double> @v_no_fminimum3_f64__multi_use(double %a, double %b, double
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %max0 = call double @llvm.minimum.f64(double %a, double %b)
   %max1 = call double @llvm.minimum.f64(double %max0, double %c)
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 46da9d33639b6..feed8c4d6b745 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -1624,20 +1624,20 @@ define double @v_fneg_inv2pi_minimum_f64(double %a) #0 {
 ; SI-NEXT:    s_mov_b32 s4, 0x6dc9c882
 ; SI-NEXT:    s_mov_b32 s5, 0xbfc45f30
 ; SI-NEXT:    v_max_f64 v[2:3], -v[0:1], s[4:5]
-; SI-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
+; SI-NEXT:    v_cmp_o_f64_e64 vcc, -v[0:1], -v[0:1]
 ; SI-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; SI-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; SI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-LABEL: v_fneg_inv2pi_minimum_f64:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_min_f64 v[2:3], v[0:1], 0.15915494309189532
-; VI-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; VI-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
 ; VI-NEXT:    v_mov_b32_e32 v1, 0xfff80000
-; VI-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; VI-NEXT:    v_cndmask_b32_e64 v1, -v3, v1, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; VI-NEXT:    v_cndmask_b32_e64 v1, v1, -v3, vcc
 ; VI-NEXT:    s_setpc_b64 s[30:31]
   %min = call double @llvm.minimum.f64(double 0x3fc45f306dc9c882, double %a)
   %fneg = fneg double %min
@@ -1651,20 +1651,20 @@ define double @v_fneg_neg_inv2pi_minimum_f64(double %a) #0 {
 ; SI-NEXT:    s_mov_b32 s4, 0x6dc9c882
 ; SI-NEXT:    s_mov_b32 s5, 0x3fc45f30
 ; SI-NEXT:    v_max_f64 v[2:3], -v[0:1], s[4:5]
-; SI-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
+; SI-NEXT:    v_cmp_o_f64_e64 vcc, -v[0:1], -v[0:1]
 ; SI-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; SI-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; SI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-LABEL: v_fneg_neg_inv2pi_minimum_f64:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_max_f64 v[2:3], -v[0:1], 0.15915494309189532
-; VI-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
+; VI-NEXT:    v_cmp_o_f64_e64 vcc, -v[0:1], -v[0:1]
 ; VI-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; VI-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; VI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; VI-NEXT:    s_setpc_b64 s[30:31]
   %min = call double @llvm.minimum.f64(double 0xbfc45f306dc9c882, double %a)
   %fneg = fneg double %min
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
index 6925a98f643b9..2e037335ce37a 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
@@ -1919,31 +1919,31 @@ define <8 x double> @double8_inselt_vec(<8 x double> %vec, i32 %sel) {
 ; GCN-LABEL: double8_inselt_vec:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v16
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v16
 ; GCN-NEXT:    v_mov_b32_e32 v17, 0x3ff00000
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v4, v4, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v5, v5, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v6, v6, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v7, v7, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v8, v8, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v9, v9, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v10, v10, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v11, v11, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v12, v12, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v13, v13, v17, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
-; GCN-NEXT:    v_cndmask_b32_e64 v14, v14, 0, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v15, v15, v17, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v17, v1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v17, v3, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 2, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v5, v17, v5, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 3, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v7, v17, v7, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 4, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v8, 0, v8, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v9, v17, v9, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 5, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v11, v17, v11, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 6, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v12, 0, v12, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v13, v17, v13, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 7, v16
+; GCN-NEXT:    v_cndmask_b32_e32 v14, 0, v14, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v15, v17, v15, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %v = insertelement <8 x double> %vec, double 1.000000e+00, i32 %sel
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
index f971080e02c5b..6add49178abbc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
@@ -13,60 +13,60 @@ define double @v_maximum_f64(double %src0, double %src1) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX900-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX950-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_f64:
@@ -131,60 +131,60 @@ define double @v_maximum_f64__nsz(double %src0, double %src1) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_f64__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_f64__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX900-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_f64__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX950-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_f64__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_f64__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_f64__nsz:
@@ -250,10 +250,10 @@ define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
 ; GFX7-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_f64__nnan_src0:
@@ -261,10 +261,10 @@ define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
 ; GFX8-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_f64__nnan_src0:
@@ -272,10 +272,10 @@ define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) {
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
 ; GFX900-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX900-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_f64__nnan_src0:
@@ -283,11 +283,11 @@ define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) {
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
 ; GFX950-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX950-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_f64__nnan_src0:
@@ -295,9 +295,9 @@ define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
 ; GFX10-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_f64__nnan_src0:
@@ -306,10 +306,10 @@ define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) {
 ; GFX11-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_f64__nnan_src0:
@@ -334,10 +334,10 @@ define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
 ; GFX7-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_f64__nnan_src1:
@@ -345,10 +345,10 @@ define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
 ; GFX8-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_f64__nnan_src1:
@@ -356,10 +356,10 @@ define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) {
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
 ; GFX900-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX900-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_f64__nnan_src1:
@@ -367,11 +367,11 @@ define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) {
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
 ; GFX950-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX950-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_f64__nnan_src1:
@@ -379,9 +379,9 @@ define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
 ; GFX10-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_f64__nnan_src1:
@@ -390,10 +390,10 @@ define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) {
 ; GFX11-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_f64__nnan_src1:
@@ -520,85 +520,85 @@ define <2 x double> @v_maximum_v2f64(<2 x double> %src0, <2 x double> %src1) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX7-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
 ; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v9, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[4:5]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v2f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX8-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
 ; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[4:5]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v2f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX900-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
 ; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v3, v9, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[4:5]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v2f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX950-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
 ; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v9, v8, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v8, v9, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[2:3], v[6:7]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v8, v5, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v2f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
 ; GFX10-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
+; GFX10-NEXT:    v_cmp_o_f64_e64 s4, v[2:3], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v5, s4
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v2f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
 ; GFX11-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s0, v[2:3], v[6:7]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v5, s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v2f64:
@@ -670,85 +670,85 @@ define <2 x double> @v_maximum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX7-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
 ; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v9, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[4:5]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v2f64__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX8-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
 ; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[4:5]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v2f64__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX900-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
 ; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v3, v9, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[4:5]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v2f64__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX950-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
 ; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v9, v8, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v8, v9, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[2:3], v[6:7]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v8, v5, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v2f64__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
 ; GFX10-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
+; GFX10-NEXT:    v_cmp_o_f64_e64 s4, v[2:3], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v5, s4
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v2f64__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
 ; GFX11-NEXT:    v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s0, v[2:3], v[6:7]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v5, s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v2f64__nsz:
@@ -955,109 +955,109 @@ define <3 x double> @v_maximum_v3f64(<3 x double> %src0, <3 x double> %src1) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX7-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9]
 ; GFX7-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11]
 ; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v13, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v5, v7, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[6:7]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v3f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX8-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9]
 ; GFX8-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11]
 ; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v13, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v5, v7, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[6:7]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v3f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX900-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9]
 ; GFX900-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11]
 ; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v13, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v5, v7, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[6:7]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v3f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX950-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc
 ; GFX950-NEXT:    v_mov_b32_e32 v12, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v13, v12, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v12, v13, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[2:3], v[8:9]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v12, v7, vcc
 ; GFX950-NEXT:    v_max_f64 v[6:7], v[4:5], v[10:11]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[4:5], v[10:11]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v12, v7, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v3f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
 ; GFX10-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s4, v[2:3], v[8:9]
 ; GFX10-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
+; GFX10-NEXT:    v_cmp_o_f64_e64 s5, v[4:5], v[10:11]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v13, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v7, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v9, s5
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v3f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
 ; GFX11-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s0, v[2:3], v[8:9]
 ; GFX11-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
+; GFX11-NEXT:    v_cmp_o_f64_e64 s1, v[4:5], v[10:11]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v13, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v7, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v9, s1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v3f64:
@@ -1136,109 +1136,109 @@ define <3 x double> @v_maximum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX7-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9]
 ; GFX7-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11]
 ; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v13, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v5, v7, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[6:7]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v3f64__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX8-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9]
 ; GFX8-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11]
 ; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v13, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v5, v7, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[6:7]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v3f64__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX900-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9]
 ; GFX900-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11]
 ; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v13, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v5, v7, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[6:7]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v3f64__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX950-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc
 ; GFX950-NEXT:    v_mov_b32_e32 v12, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v13, v12, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v12, v13, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[2:3], v[8:9]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v12, v7, vcc
 ; GFX950-NEXT:    v_max_f64 v[6:7], v[4:5], v[10:11]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[4:5], v[10:11]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v12, v7, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v3f64__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
 ; GFX10-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s4, v[2:3], v[8:9]
 ; GFX10-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
+; GFX10-NEXT:    v_cmp_o_f64_e64 s5, v[4:5], v[10:11]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v13, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v7, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v9, s5
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v3f64__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
 ; GFX11-NEXT:    v_max_f64 v[6:7], v[2:3], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s0, v[2:3], v[8:9]
 ; GFX11-NEXT:    v_max_f64 v[8:9], v[4:5], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
+; GFX11-NEXT:    v_cmp_o_f64_e64 s1, v[4:5], v[10:11]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v13, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v7, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v9, s1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v3f64__nsz:
@@ -1317,134 +1317,134 @@ define <4 x double> @v_maximum_v4f64(<4 x double> %src0, <4 x double> %src1) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX7-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
 ; GFX7-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13]
 ; GFX7-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15]
 ; GFX7-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v17, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v7, v9, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v7, v11, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[8:9]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v4f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX8-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
 ; GFX8-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13]
 ; GFX8-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15]
 ; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v17, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v7, v9, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v7, v11, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[8:9]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v4f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX900-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
 ; GFX900-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13]
 ; GFX900-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15]
 ; GFX900-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v7, v17, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v7, v9, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v7, v11, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[8:9]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v4f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX950-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
 ; GFX950-NEXT:    v_mov_b32_e32 v16, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v16, v17, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[2:3], v[10:11]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v16, v9, vcc
 ; GFX950-NEXT:    v_max_f64 v[8:9], v[4:5], v[12:13]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[4:5], v[12:13]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v16, v9, vcc
 ; GFX950-NEXT:    v_max_f64 v[8:9], v[6:7], v[14:15]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[6:7], v[14:15]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, 0, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v16, v9, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v4f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9]
 ; GFX10-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s4, v[2:3], v[10:11]
 ; GFX10-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s5, v[4:5], v[12:13]
 ; GFX10-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
+; GFX10-NEXT:    v_cmp_o_f64_e64 s6, v[6:7], v[14:15]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v9, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v11, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0x7ff80000, v13, s6
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v4f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9]
 ; GFX11-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s0, v[2:3], v[10:11]
 ; GFX11-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s1, v[4:5], v[12:13]
 ; GFX11-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
+; GFX11-NEXT:    v_cmp_o_f64_e64 s2, v[6:7], v[14:15]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v9, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v11, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, 0x7ff80000, v13, s2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v4f64:
@@ -1530,134 +1530,134 @@ define <4 x double> @v_maximum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX7-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
 ; GFX7-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13]
 ; GFX7-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15]
 ; GFX7-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v17, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v7, v9, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v7, v11, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[8:9]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v4f64__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX8-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
 ; GFX8-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13]
 ; GFX8-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15]
 ; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v17, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v7, v9, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v7, v11, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[8:9]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v4f64__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX900-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
 ; GFX900-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13]
 ; GFX900-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15]
 ; GFX900-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v7, v17, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v7, v9, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v7, v11, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[8:9]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v4f64__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX950-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
 ; GFX950-NEXT:    v_mov_b32_e32 v16, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v16, v17, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[2:3], v[10:11]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v16, v9, vcc
 ; GFX950-NEXT:    v_max_f64 v[8:9], v[4:5], v[12:13]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[4:5], v[12:13]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v16, v9, vcc
 ; GFX950-NEXT:    v_max_f64 v[8:9], v[6:7], v[14:15]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[6:7], v[14:15]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, 0, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v16, v9, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v4f64__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9]
 ; GFX10-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s4, v[2:3], v[10:11]
 ; GFX10-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s5, v[4:5], v[12:13]
 ; GFX10-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
+; GFX10-NEXT:    v_cmp_o_f64_e64 s6, v[6:7], v[14:15]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v9, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v11, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0x7ff80000, v13, s6
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v4f64__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9]
 ; GFX11-NEXT:    v_max_f64 v[8:9], v[2:3], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s0, v[2:3], v[10:11]
 ; GFX11-NEXT:    v_max_f64 v[10:11], v[4:5], v[12:13]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s1, v[4:5], v[12:13]
 ; GFX11-NEXT:    v_max_f64 v[12:13], v[6:7], v[14:15]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
+; GFX11-NEXT:    v_cmp_o_f64_e64 s2, v[6:7], v[14:15]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v9, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v11, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, 0x7ff80000, v13, s2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v4f64__nsz:
@@ -1744,39 +1744,39 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX7-NEXT:    v_max_f64 v[32:33], v[0:1], v[16:17]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[16:17]
 ; GFX7-NEXT:    v_max_f64 v[16:17], v[2:3], v[18:19]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[18:19]
 ; GFX7-NEXT:    v_mov_b32_e32 v34, 0x7ff80000
 ; GFX7-NEXT:    v_max_f64 v[18:19], v[4:5], v[20:21]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[20:21]
 ; GFX7-NEXT:    v_max_f64 v[20:21], v[6:7], v[22:23]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[22:23]
 ; GFX7-NEXT:    v_max_f64 v[22:23], v[8:9], v[24:25]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[10:11], v[8:9], v[24:25]
 ; GFX7-NEXT:    v_max_f64 v[24:25], v[10:11], v[26:27]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[12:13], v[10:11], v[26:27]
 ; GFX7-NEXT:    v_max_f64 v[26:27], v[12:13], v[28:29]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v34, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v17, v34, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v19, v34, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v21, v34, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e64 v9, v23, v34, s[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e64 v11, v25, v34, s[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s[14:15]
-; GFX7-NEXT:    v_cndmask_b32_e64 v13, v27, v34, s[14:15]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v34, v33, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, v16, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v34, v17, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, 0, v18, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v34, v19, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, 0, v20, s[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v34, v21, s[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v8, 0, v22, s[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e64 v9, v34, v23, s[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e64 v10, 0, v24, s[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e64 v11, v34, v25, s[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e64 v12, 0, v26, s[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e64 v13, v34, v27, s[14:15]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    v_max_f64 v[16:17], v[14:15], v[30:31]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX7-NEXT:    v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v15, v17, v34, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[14:15], v[30:31]
+; GFX7-NEXT:    v_cndmask_b32_e32 v14, 0, v16, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v15, v34, v17, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v8f64:
@@ -1784,39 +1784,39 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX8-NEXT:    v_max_f64 v[32:33], v[0:1], v[16:17]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[16:17]
 ; GFX8-NEXT:    v_max_f64 v[16:17], v[2:3], v[18:19]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[18:19]
 ; GFX8-NEXT:    v_mov_b32_e32 v34, 0x7ff80000
 ; GFX8-NEXT:    v_max_f64 v[18:19], v[4:5], v[20:21]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[20:21]
 ; GFX8-NEXT:    v_max_f64 v[20:21], v[6:7], v[22:23]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[22:23]
 ; GFX8-NEXT:    v_max_f64 v[22:23], v[8:9], v[24:25]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[10:11], v[8:9], v[24:25]
 ; GFX8-NEXT:    v_max_f64 v[24:25], v[10:11], v[26:27]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[12:13], v[10:11], v[26:27]
 ; GFX8-NEXT:    v_max_f64 v[26:27], v[12:13], v[28:29]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v34, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v17, v34, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v19, v34, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v21, v34, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e64 v9, v23, v34, s[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e64 v11, v25, v34, s[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e64 v13, v27, v34, s[14:15]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v34, v33, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, v16, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v34, v17, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, v18, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v34, v19, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, v20, s[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v34, v21, s[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, v22, s[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e64 v9, v34, v23, s[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e64 v10, 0, v24, s[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e64 v11, v34, v25, s[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e64 v12, 0, v26, s[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e64 v13, v34, v27, s[14:15]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_max_f64 v[16:17], v[14:15], v[30:31]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX8-NEXT:    v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v15, v17, v34, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[14:15], v[30:31]
+; GFX8-NEXT:    v_cndmask_b32_e32 v14, 0, v16, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v15, v34, v17, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v8f64:
@@ -1824,39 +1824,39 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) {
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX900-NEXT:    v_max_f64 v[32:33], v[0:1], v[16:17]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[16:17]
 ; GFX900-NEXT:    v_max_f64 v[16:17], v[2:3], v[18:19]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[18:19]
 ; GFX900-NEXT:    v_mov_b32_e32 v34, 0x7ff80000
 ; GFX900-NEXT:    v_max_f64 v[18:19], v[4:5], v[20:21]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[20:21]
 ; GFX900-NEXT:    v_max_f64 v[20:21], v[6:7], v[22:23]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[22:23]
 ; GFX900-NEXT:    v_max_f64 v[22:23], v[8:9], v[24:25]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[10:11], v[8:9], v[24:25]
 ; GFX900-NEXT:    v_max_f64 v[24:25], v[10:11], v[26:27]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[12:13], v[10:11], v[26:27]
 ; GFX900-NEXT:    v_max_f64 v[26:27], v[12:13], v[28:29]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v33, v34, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v17, v34, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v19, v34, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v21, v34, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s[10:11]
-; GFX900-NEXT:    v_cndmask_b32_e64 v9, v23, v34, s[10:11]
-; GFX900-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s[12:13]
-; GFX900-NEXT:    v_cndmask_b32_e64 v11, v25, v34, s[12:13]
-; GFX900-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s[14:15]
-; GFX900-NEXT:    v_cndmask_b32_e64 v13, v27, v34, s[14:15]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v34, v33, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, 0, v16, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v34, v17, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, 0, v18, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v34, v19, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, 0, v20, s[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v34, v21, s[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v8, 0, v22, s[10:11]
+; GFX900-NEXT:    v_cndmask_b32_e64 v9, v34, v23, s[10:11]
+; GFX900-NEXT:    v_cndmask_b32_e64 v10, 0, v24, s[12:13]
+; GFX900-NEXT:    v_cndmask_b32_e64 v11, v34, v25, s[12:13]
+; GFX900-NEXT:    v_cndmask_b32_e64 v12, 0, v26, s[14:15]
+; GFX900-NEXT:    v_cndmask_b32_e64 v13, v34, v27, s[14:15]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    v_max_f64 v[16:17], v[14:15], v[30:31]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX900-NEXT:    v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v15, v17, v34, vcc
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[14:15], v[30:31]
+; GFX900-NEXT:    v_cndmask_b32_e32 v14, 0, v16, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v15, v34, v17, vcc
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v8f64:
@@ -1865,42 +1865,42 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) {
 ; GFX950-NEXT:    scratch_load_dword v31, off, s32
 ; GFX950-NEXT:    v_mov_b32_e32 v54, 0x7ff80000
 ; GFX950-NEXT:    v_max_f64 v[32:33], v[0:1], v[16:17]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[16:17]
 ; GFX950-NEXT:    v_max_f64 v[34:35], v[2:3], v[18:19]
 ; GFX950-NEXT:    v_max_f64 v[36:37], v[4:5], v[20:21]
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v33, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v32, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v54, v33, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[2:3], v[18:19]
 ; GFX950-NEXT:    v_max_f64 v[38:39], v[6:7], v[22:23]
 ; GFX950-NEXT:    v_max_f64 v[48:49], v[8:9], v[24:25]
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v34, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v35, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, 0, v34, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v54, v35, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[4:5], v[20:21]
 ; GFX950-NEXT:    v_max_f64 v[50:51], v[10:11], v[26:27]
 ; GFX950-NEXT:    v_max_f64 v[52:53], v[12:13], v[28:29]
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v36, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v37, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, 0, v36, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v54, v37, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[6:7], v[22:23]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
 ; GFX950-NEXT:    v_max_f64 v[16:17], v[14:15], v[30:31]
-; GFX950-NEXT:    v_cndmask_b32_e64 v6, v38, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v7, v39, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, 0, v38, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v54, v39, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[8:9], v[24:25]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v8, v48, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v9, v49, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, 0, v48, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v54, v49, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[10:11], v[26:27]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v10, v50, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v11, v51, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX950-NEXT:    v_cndmask_b32_e32 v10, 0, v50, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v11, v54, v51, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[12:13], v[28:29]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v12, v52, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v13, v53, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX950-NEXT:    v_cndmask_b32_e32 v12, 0, v52, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v13, v54, v53, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[14:15], v[30:31]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v15, v17, v54, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v14, 0, v16, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v15, v54, v17, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_maximum_v8f64:
@@ -1908,38 +1908,38 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX10-NEXT:    v_max_f64 v[32:33], v[0:1], v[16:17]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[16:17]
 ; GFX10-NEXT:    v_max_f64 v[16:17], v[2:3], v[18:19]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[18:19]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s4, v[2:3], v[18:19]
 ; GFX10-NEXT:    v_max_f64 v[18:19], v[4:5], v[20:21]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[20:21]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s5, v[4:5], v[20:21]
 ; GFX10-NEXT:    v_max_f64 v[20:21], v[6:7], v[22:23]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[22:23]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s6, v[6:7], v[22:23]
 ; GFX10-NEXT:    v_max_f64 v[22:23], v[8:9], v[24:25]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s7, v[8:9], v[24:25]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s7, v[8:9], v[24:25]
 ; GFX10-NEXT:    v_max_f64 v[24:25], v[10:11], v[26:27]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s8, v[10:11], v[26:27]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s8, v[10:11], v[26:27]
 ; GFX10-NEXT:    v_max_f64 v[26:27], v[12:13], v[28:29]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s9, v[12:13], v[28:29]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v17, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v19, 0x7ff80000, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v21, 0x7ff80000, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s7
-; GFX10-NEXT:    v_cndmask_b32_e64 v9, v23, 0x7ff80000, s7
-; GFX10-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s8
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, v25, 0x7ff80000, s8
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s9
-; GFX10-NEXT:    v_cndmask_b32_e64 v13, v27, 0x7ff80000, s9
+; GFX10-NEXT:    v_cmp_o_f64_e64 s9, v[12:13], v[28:29]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v32, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v33, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v16, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v17, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, v18, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v19, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, v20, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0x7ff80000, v21, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, v22, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0x7ff80000, v23, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, v24, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0x7ff80000, v25, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, v26, s9
+; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0x7ff80000, v27, s9
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_max_f64 v[28:29], v[14:15], v[30:31]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s10, v[14:15], v[30:31]
-; GFX10-NEXT:    v_cndmask_b32_e64 v14, v28, 0, s10
-; GFX10-NEXT:    v_cndmask_b32_e64 v15, v29, 0x7ff80000, s10
+; GFX10-NEXT:    v_cmp_o_f64_e64 s10, v[14:15], v[30:31]
+; GFX10-NEXT:    v_cndmask_b32_e64 v14, 0, v28, s10
+; GFX10-NEXT:    v_cndmask_b32_e64 v15, 0x7ff80000, v29, s10
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v8f64:
@@ -1947,39 +1947,39 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    scratch_load_b32 v31, off, s32
 ; GFX11-NEXT:    v_max_f64 v[32:33], v[0:1], v[16:17]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[16:17]
 ; GFX11-NEXT:    v_max_f64 v[16:17], v[2:3], v[18:19]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[18:19]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s0, v[2:3], v[18:19]
 ; GFX11-NEXT:    v_max_f64 v[18:19], v[4:5], v[20:21]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[20:21]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s1, v[4:5], v[20:21]
 ; GFX11-NEXT:    v_max_f64 v[20:21], v[6:7], v[22:23]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[22:23]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s2, v[6:7], v[22:23]
 ; GFX11-NEXT:    v_max_f64 v[22:23], v[8:9], v[24:25]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s3, v[8:9], v[24:25]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s3, v[8:9], v[24:25]
 ; GFX11-NEXT:    v_max_f64 v[24:25], v[10:11], v[26:27]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s4, v[10:11], v[26:27]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s4, v[10:11], v[26:27]
 ; GFX11-NEXT:    v_max_f64 v[26:27], v[12:13], v[28:29]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s5, v[12:13], v[28:29]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v17, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v19, 0x7ff80000, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v7, v21, 0x7ff80000, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s3
-; GFX11-NEXT:    v_cndmask_b32_e64 v9, v23, 0x7ff80000, s3
-; GFX11-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s4
-; GFX11-NEXT:    v_cndmask_b32_e64 v11, v25, 0x7ff80000, s4
-; GFX11-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s5
-; GFX11-NEXT:    v_cndmask_b32_e64 v13, v27, 0x7ff80000, s5
+; GFX11-NEXT:    v_cmp_o_f64_e64 s5, v[12:13], v[28:29]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v32, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v33, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v16, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v17, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, v18, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v19, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, 0, v20, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, 0x7ff80000, v21, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v8, 0, v22, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v9, 0x7ff80000, v23, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v10, 0, v24, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v11, 0x7ff80000, v25, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v12, 0, v26, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v13, 0x7ff80000, v27, s5
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_max_f64 v[28:29], v[14:15], v[30:31]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s6, v[14:15], v[30:31]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s6, v[14:15], v[30:31]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v14, v28, 0, s6
-; GFX11-NEXT:    v_cndmask_b32_e64 v15, v29, 0x7ff80000, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v14, 0, v28, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v15, 0x7ff80000, v29, s6
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v8f64:
@@ -2011,117 +2011,117 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:8
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[31:32]
 ; GFX7-NEXT:    v_max_f64 v[0:1], v[0:1], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:16
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:12
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[31:32]
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[2:3], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:24
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:20
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[4:5]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[31:32]
 ; GFX7-NEXT:    v_max_f64 v[4:5], v[4:5], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:32
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v4, 0, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[6:7]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[31:32]
 ; GFX7-NEXT:    v_max_f64 v[6:7], v[6:7], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:36
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:40
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, 0, v6, s[8:9]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[10:11], v[8:9], v[31:32]
 ; GFX7-NEXT:    v_max_f64 v[8:9], v[8:9], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:48
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:44
-; GFX7-NEXT:    v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[10:11]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[12:13], v[10:11], v[31:32]
 ; GFX7-NEXT:    v_max_f64 v[10:11], v[10:11], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:56
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX7-NEXT:    v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e64 v10, 0, v10, s[12:13]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[14:15], v[12:13], v[31:32]
 ; GFX7-NEXT:    v_max_f64 v[12:13], v[12:13], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:64
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:60
-; GFX7-NEXT:    v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e64 v12, 0, v12, s[14:15]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[16:17], v[14:15], v[31:32]
 ; GFX7-NEXT:    v_max_f64 v[14:15], v[14:15], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:72
-; GFX7-NEXT:    v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX7-NEXT:    v_cndmask_b32_e64 v14, 0, v14, s[16:17]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[18:19], v[16:17], v[31:32]
 ; GFX7-NEXT:    v_max_f64 v[16:17], v[16:17], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:80
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:76
-; GFX7-NEXT:    v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX7-NEXT:    v_cndmask_b32_e64 v16, 0, v16, s[18:19]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[20:21], v[18:19], v[31:32]
 ; GFX7-NEXT:    v_max_f64 v[18:19], v[18:19], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:88
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX7-NEXT:    v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX7-NEXT:    v_cndmask_b32_e64 v18, 0, v18, s[20:21]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[22:23], v[20:21], v[31:32]
 ; GFX7-NEXT:    v_max_f64 v[20:21], v[20:21], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:96
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:92
-; GFX7-NEXT:    v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX7-NEXT:    v_cndmask_b32_e64 v20, 0, v20, s[22:23]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[24:25], v[22:23], v[31:32]
 ; GFX7-NEXT:    v_max_f64 v[22:23], v[22:23], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:100
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:104
-; GFX7-NEXT:    v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX7-NEXT:    v_cndmask_b32_e64 v22, 0, v22, s[24:25]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[26:27], v[24:25], v[31:32]
 ; GFX7-NEXT:    v_max_f64 v[24:25], v[24:25], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:112
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:108
-; GFX7-NEXT:    v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX7-NEXT:    v_cndmask_b32_e64 v24, 0, v24, s[26:27]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[28:29], v[26:27], v[31:32]
 ; GFX7-NEXT:    v_max_f64 v[26:27], v[26:27], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:120
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:116
-; GFX7-NEXT:    v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX7-NEXT:    v_cndmask_b32_e64 v26, 0, v26, s[28:29]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[40:41], v[28:29], v[31:32]
 ; GFX7-NEXT:    v_max_f64 v[28:29], v[28:29], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX7-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:128
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX7-NEXT:    v_cndmask_b32_e64 v28, v28, 0, s[40:41]
+; GFX7-NEXT:    v_cndmask_b32_e64 v28, 0, v28, s[40:41]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[42:43], v[30:31], v[32:33]
 ; GFX7-NEXT:    v_max_f64 v[30:31], v[30:31], v[32:33]
 ; GFX7-NEXT:    v_mov_b32_e32 v32, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v32, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, v32, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v7, v32, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v9, v9, v32, s[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e64 v11, v11, v32, s[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e64 v13, v13, v32, s[14:15]
-; GFX7-NEXT:    v_cndmask_b32_e64 v15, v15, v32, s[16:17]
-; GFX7-NEXT:    v_cndmask_b32_e64 v17, v17, v32, s[18:19]
-; GFX7-NEXT:    v_cndmask_b32_e64 v19, v19, v32, s[20:21]
-; GFX7-NEXT:    v_cndmask_b32_e64 v21, v21, v32, s[22:23]
-; GFX7-NEXT:    v_cndmask_b32_e64 v23, v23, v32, s[24:25]
-; GFX7-NEXT:    v_cndmask_b32_e64 v25, v25, v32, s[26:27]
-; GFX7-NEXT:    v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX7-NEXT:    v_cndmask_b32_e64 v29, v29, v32, s[40:41]
-; GFX7-NEXT:    v_cndmask_b32_e64 v31, v31, v32, s[42:43]
-; GFX7-NEXT:    v_cndmask_b32_e64 v30, v30, 0, s[42:43]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v32, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v32, v3, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v32, v5, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v32, v7, s[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v9, v32, v9, s[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e64 v11, v32, v11, s[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e64 v13, v32, v13, s[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e64 v15, v32, v15, s[16:17]
+; GFX7-NEXT:    v_cndmask_b32_e64 v17, v32, v17, s[18:19]
+; GFX7-NEXT:    v_cndmask_b32_e64 v19, v32, v19, s[20:21]
+; GFX7-NEXT:    v_cndmask_b32_e64 v21, v32, v21, s[22:23]
+; GFX7-NEXT:    v_cndmask_b32_e64 v23, v32, v23, s[24:25]
+; GFX7-NEXT:    v_cndmask_b32_e64 v25, v32, v25, s[26:27]
+; GFX7-NEXT:    v_cndmask_b32_e64 v27, v32, v27, s[28:29]
+; GFX7-NEXT:    v_cndmask_b32_e64 v29, v32, v29, s[40:41]
+; GFX7-NEXT:    v_cndmask_b32_e64 v31, v32, v31, s[42:43]
+; GFX7-NEXT:    v_cndmask_b32_e64 v30, 0, v30, s[42:43]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_maximum_v16f64:
@@ -2130,117 +2130,117 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:8
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[31:32]
 ; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:16
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:12
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[31:32]
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:24
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:20
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[4:5]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[31:32]
 ; GFX8-NEXT:    v_max_f64 v[4:5], v[4:5], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:32
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, 0, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[6:7]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[31:32]
 ; GFX8-NEXT:    v_max_f64 v[6:7], v[6:7], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:36
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:40
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, v6, s[8:9]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[10:11], v[8:9], v[31:32]
 ; GFX8-NEXT:    v_max_f64 v[8:9], v[8:9], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:48
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:44
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[10:11]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[12:13], v[10:11], v[31:32]
 ; GFX8-NEXT:    v_max_f64 v[10:11], v[10:11], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:56
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX8-NEXT:    v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e64 v10, 0, v10, s[12:13]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[14:15], v[12:13], v[31:32]
 ; GFX8-NEXT:    v_max_f64 v[12:13], v[12:13], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:64
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:60
-; GFX8-NEXT:    v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e64 v12, 0, v12, s[14:15]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[16:17], v[14:15], v[31:32]
 ; GFX8-NEXT:    v_max_f64 v[14:15], v[14:15], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:72
-; GFX8-NEXT:    v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX8-NEXT:    v_cndmask_b32_e64 v14, 0, v14, s[16:17]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[18:19], v[16:17], v[31:32]
 ; GFX8-NEXT:    v_max_f64 v[16:17], v[16:17], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:80
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:76
-; GFX8-NEXT:    v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX8-NEXT:    v_cndmask_b32_e64 v16, 0, v16, s[18:19]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[20:21], v[18:19], v[31:32]
 ; GFX8-NEXT:    v_max_f64 v[18:19], v[18:19], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:88
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX8-NEXT:    v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX8-NEXT:    v_cndmask_b32_e64 v18, 0, v18, s[20:21]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[22:23], v[20:21], v[31:32]
 ; GFX8-NEXT:    v_max_f64 v[20:21], v[20:21], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:96
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:92
-; GFX8-NEXT:    v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX8-NEXT:    v_cndmask_b32_e64 v20, 0, v20, s[22:23]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[24:25], v[22:23], v[31:32]
 ; GFX8-NEXT:    v_max_f64 v[22:23], v[22:23], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:100
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:104
-; GFX8-NEXT:    v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX8-NEXT:    v_cndmask_b32_e64 v22, 0, v22, s[24:25]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[26:27], v[24:25], v[31:32]
 ; GFX8-NEXT:    v_max_f64 v[24:25], v[24:25], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:112
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:108
-; GFX8-NEXT:    v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX8-NEXT:    v_cndmask_b32_e64 v24, 0, v24, s[26:27]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[28:29], v[26:27], v[31:32]
 ; GFX8-NEXT:    v_max_f64 v[26:27], v[26:27], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:120
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:116
-; GFX8-NEXT:    v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX8-NEXT:    v_cndmask_b32_e64 v26, 0, v26, s[28:29]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[40:41], v[28:29], v[31:32]
 ; GFX8-NEXT:    v_max_f64 v[28:29], v[28:29], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX8-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:128
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX8-NEXT:    v_cndmask_b32_e64 v28, v28, 0, s[40:41]
+; GFX8-NEXT:    v_cndmask_b32_e64 v28, 0, v28, s[40:41]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[42:43], v[30:31], v[32:33]
 ; GFX8-NEXT:    v_max_f64 v[30:31], v[30:31], v[32:33]
 ; GFX8-NEXT:    v_mov_b32_e32 v32, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v32, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v32, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v32, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v9, v9, v32, s[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e64 v11, v11, v32, s[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e64 v13, v13, v32, s[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e64 v15, v15, v32, s[16:17]
-; GFX8-NEXT:    v_cndmask_b32_e64 v17, v17, v32, s[18:19]
-; GFX8-NEXT:    v_cndmask_b32_e64 v19, v19, v32, s[20:21]
-; GFX8-NEXT:    v_cndmask_b32_e64 v21, v21, v32, s[22:23]
-; GFX8-NEXT:    v_cndmask_b32_e64 v23, v23, v32, s[24:25]
-; GFX8-NEXT:    v_cndmask_b32_e64 v25, v25, v32, s[26:27]
-; GFX8-NEXT:    v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX8-NEXT:    v_cndmask_b32_e64 v29, v29, v32, s[40:41]
-; GFX8-NEXT:    v_cndmask_b32_e64 v31, v31, v32, s[42:43]
-; GFX8-NEXT:    v_cndmask_b32_e64 v30, v30, 0, s[42:43]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v32, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v32, v3, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v32, v5, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v32, v7, s[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v9, v32, v9, s[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e64 v11, v32, v11, s[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e64 v13, v32, v13, s[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e64 v15, v32, v15, s[16:17]
+; GFX8-NEXT:    v_cndmask_b32_e64 v17, v32, v17, s[18:19]
+; GFX8-NEXT:    v_cndmask_b32_e64 v19, v32, v19, s[20:21]
+; GFX8-NEXT:    v_cndmask_b32_e64 v21, v32, v21, s[22:23]
+; GFX8-NEXT:    v_cndmask_b32_e64 v23, v32, v23, s[24:25]
+; GFX8-NEXT:    v_cndmask_b32_e64 v25, v32, v25, s[26:27]
+; GFX8-NEXT:    v_cndmask_b32_e64 v27, v32, v27, s[28:29]
+; GFX8-NEXT:    v_cndmask_b32_e64 v29, v32, v29, s[40:41]
+; GFX8-NEXT:    v_cndmask_b32_e64 v31, v32, v31, s[42:43]
+; GFX8-NEXT:    v_cndmask_b32_e64 v30, 0, v30, s[42:43]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_maximum_v16f64:
@@ -2249,117 +2249,117 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:8
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[31:32]
 ; GFX900-NEXT:    v_max_f64 v[0:1], v[0:1], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:16
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:12
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[31:32]
 ; GFX900-NEXT:    v_max_f64 v[2:3], v[2:3], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:24
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:20
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[4:5]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[31:32]
 ; GFX900-NEXT:    v_max_f64 v[4:5], v[4:5], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:32
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v4, 0, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[6:7]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[31:32]
 ; GFX900-NEXT:    v_max_f64 v[6:7], v[6:7], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:36
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:40
-; GFX900-NEXT:    v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, 0, v6, s[8:9]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[10:11], v[8:9], v[31:32]
 ; GFX900-NEXT:    v_max_f64 v[8:9], v[8:9], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:48
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:44
-; GFX900-NEXT:    v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX900-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[10:11]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[12:13], v[10:11], v[31:32]
 ; GFX900-NEXT:    v_max_f64 v[10:11], v[10:11], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:56
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX900-NEXT:    v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX900-NEXT:    v_cndmask_b32_e64 v10, 0, v10, s[12:13]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[14:15], v[12:13], v[31:32]
 ; GFX900-NEXT:    v_max_f64 v[12:13], v[12:13], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:64
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:60
-; GFX900-NEXT:    v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX900-NEXT:    v_cndmask_b32_e64 v12, 0, v12, s[14:15]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[16:17], v[14:15], v[31:32]
 ; GFX900-NEXT:    v_max_f64 v[14:15], v[14:15], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:72
-; GFX900-NEXT:    v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX900-NEXT:    v_cndmask_b32_e64 v14, 0, v14, s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[18:19], v[16:17], v[31:32]
 ; GFX900-NEXT:    v_max_f64 v[16:17], v[16:17], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:80
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:76
-; GFX900-NEXT:    v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX900-NEXT:    v_cndmask_b32_e64 v16, 0, v16, s[18:19]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[20:21], v[18:19], v[31:32]
 ; GFX900-NEXT:    v_max_f64 v[18:19], v[18:19], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:88
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX900-NEXT:    v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX900-NEXT:    v_cndmask_b32_e64 v18, 0, v18, s[20:21]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[22:23], v[20:21], v[31:32]
 ; GFX900-NEXT:    v_max_f64 v[20:21], v[20:21], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:96
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:92
-; GFX900-NEXT:    v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX900-NEXT:    v_cndmask_b32_e64 v20, 0, v20, s[22:23]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[24:25], v[22:23], v[31:32]
 ; GFX900-NEXT:    v_max_f64 v[22:23], v[22:23], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:100
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:104
-; GFX900-NEXT:    v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX900-NEXT:    v_cndmask_b32_e64 v22, 0, v22, s[24:25]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[26:27], v[24:25], v[31:32]
 ; GFX900-NEXT:    v_max_f64 v[24:25], v[24:25], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:112
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:108
-; GFX900-NEXT:    v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX900-NEXT:    v_cndmask_b32_e64 v24, 0, v24, s[26:27]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[28:29], v[26:27], v[31:32]
 ; GFX900-NEXT:    v_max_f64 v[26:27], v[26:27], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:120
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:116
-; GFX900-NEXT:    v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX900-NEXT:    v_cndmask_b32_e64 v26, 0, v26, s[28:29]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[40:41], v[28:29], v[31:32]
 ; GFX900-NEXT:    v_max_f64 v[28:29], v[28:29], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX900-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:128
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX900-NEXT:    v_cndmask_b32_e64 v28, v28, 0, s[40:41]
+; GFX900-NEXT:    v_cndmask_b32_e64 v28, 0, v28, s[40:41]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[42:43], v[30:31], v[32:33]
 ; GFX900-NEXT:    v_max_f64 v[30:31], v[30:31], v[32:33]
 ; GFX900-NEXT:    v_mov_b32_e32 v32, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v32, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v5, v32, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v7, v32, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v9, v9, v32, s[10:11]
-; GFX900-NEXT:    v_cndmask_b32_e64 v11, v11, v32, s[12:13]
-; GFX900-NEXT:    v_cndmask_b32_e64 v13, v13, v32, s[14:15]
-; GFX900-NEXT:    v_cndmask_b32_e64 v15, v15, v32, s[16:17]
-; GFX900-NEXT:    v_cndmask_b32_e64 v17, v17, v32, s[18:19]
-; GFX900-NEXT:    v_cndmask_b32_e64 v19, v19, v32, s[20:21]
-; GFX900-NEXT:    v_cndmask_b32_e64 v21, v21, v32, s[22:23]
-; GFX900-NEXT:    v_cndmask_b32_e64 v23, v23, v32, s[24:25]
-; GFX900-NEXT:    v_cndmask_b32_e64 v25, v25, v32, s[26:27]
-; GFX900-NEXT:    v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX900-NEXT:    v_cndmask_b32_e64 v29, v29, v32, s[40:41]
-; GFX900-NEXT:    v_cndmask_b32_e64 v31, v31, v32, s[42:43]
-; GFX900-NEXT:    v_cndmask_b32_e64 v30, v30, 0, s[42:43]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v32, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v32, v3, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v32, v5, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v32, v7, s[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v9, v32, v9, s[10:11]
+; GFX900-NEXT:    v_cndmask_b32_e64 v11, v32, v11, s[12:13]
+; GFX900-NEXT:    v_cndmask_b32_e64 v13, v32, v13, s[14:15]
+; GFX900-NEXT:    v_cndmask_b32_e64 v15, v32, v15, s[16:17]
+; GFX900-NEXT:    v_cndmask_b32_e64 v17, v32, v17, s[18:19]
+; GFX900-NEXT:    v_cndmask_b32_e64 v19, v32, v19, s[20:21]
+; GFX900-NEXT:    v_cndmask_b32_e64 v21, v32, v21, s[22:23]
+; GFX900-NEXT:    v_cndmask_b32_e64 v23, v32, v23, s[24:25]
+; GFX900-NEXT:    v_cndmask_b32_e64 v25, v32, v25, s[26:27]
+; GFX900-NEXT:    v_cndmask_b32_e64 v27, v32, v27, s[28:29]
+; GFX900-NEXT:    v_cndmask_b32_e64 v29, v32, v29, s[40:41]
+; GFX900-NEXT:    v_cndmask_b32_e64 v31, v32, v31, s[42:43]
+; GFX900-NEXT:    v_cndmask_b32_e64 v30, 0, v30, s[42:43]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_maximum_v16f64:
@@ -2410,107 +2410,107 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX950-NEXT:    v_accvgpr_write_b32 a15, v63 ; Reload Reuse
 ; GFX950-NEXT:    s_waitcnt vmcnt(25)
 ; GFX950-NEXT:    v_max_f64 v[58:59], v[0:1], v[32:33]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[32:33]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[32:33]
 ; GFX950-NEXT:    scratch_load_dword v33, off, s32 offset:112
 ; GFX950-NEXT:    scratch_load_dword v32, off, s32 offset:108
 ; GFX950-NEXT:    s_waitcnt vmcnt(25)
 ; GFX950-NEXT:    v_max_f64 v[60:61], v[2:3], v[36:37]
-; GFX950-NEXT:    v_cmp_u_f64_e64 s[0:1], v[2:3], v[36:37]
+; GFX950-NEXT:    v_cmp_o_f64_e64 s[0:1], v[2:3], v[36:37]
 ; GFX950-NEXT:    scratch_load_dword v37, off, s32 offset:120
 ; GFX950-NEXT:    scratch_load_dword v36, off, s32 offset:116
 ; GFX950-NEXT:    s_waitcnt vmcnt(25)
 ; GFX950-NEXT:    v_max_f64 v[62:63], v[4:5], v[38:39]
-; GFX950-NEXT:    v_cmp_u_f64_e64 s[2:3], v[4:5], v[38:39]
+; GFX950-NEXT:    v_cmp_o_f64_e64 s[2:3], v[4:5], v[38:39]
 ; GFX950-NEXT:    scratch_load_dword v39, off, s32 offset:128
 ; GFX950-NEXT:    scratch_load_dword v38, off, s32 offset:124
 ; GFX950-NEXT:    v_mov_b32_e32 v2, 0x7ff80000
 ; GFX950-NEXT:    s_waitcnt vmcnt(25)
 ; GFX950-NEXT:    v_max_f64 v[0:1], v[6:7], v[56:57]
-; GFX950-NEXT:    v_cmp_u_f64_e64 s[4:5], v[6:7], v[56:57]
+; GFX950-NEXT:    v_cmp_o_f64_e64 s[4:5], v[6:7], v[56:57]
 ; GFX950-NEXT:    s_waitcnt vmcnt(23)
 ; GFX950-NEXT:    v_max_f64 v[56:57], v[8:9], v[46:47]
-; GFX950-NEXT:    v_cndmask_b32_e64 v58, v58, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v59, v59, v2, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
-; GFX950-NEXT:    v_cndmask_b32_e64 v6, v0, 0, s[4:5]
-; GFX950-NEXT:    v_cndmask_b32_e64 v7, v1, v2, s[4:5]
-; GFX950-NEXT:    v_cndmask_b32_e64 v8, v56, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v9, v57, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v58, 0, v58, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v59, v2, v59, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[8:9], v[46:47]
+; GFX950-NEXT:    v_cndmask_b32_e64 v6, 0, v0, s[4:5]
+; GFX950-NEXT:    v_cndmask_b32_e64 v7, v2, v1, s[4:5]
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, 0, v56, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v2, v57, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(21)
 ; GFX950-NEXT:    v_max_f64 v[0:1], v[10:11], v[44:45]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
-; GFX950-NEXT:    v_cndmask_b32_e64 v60, v60, 0, s[0:1]
-; GFX950-NEXT:    v_cndmask_b32_e64 v3, v61, v2, s[0:1]
-; GFX950-NEXT:    v_cndmask_b32_e64 v10, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v11, v1, v2, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[10:11], v[44:45]
+; GFX950-NEXT:    v_cndmask_b32_e64 v60, 0, v60, s[0:1]
+; GFX950-NEXT:    v_cndmask_b32_e64 v3, v2, v61, s[0:1]
+; GFX950-NEXT:    v_cndmask_b32_e32 v10, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v11, v2, v1, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(19)
 ; GFX950-NEXT:    v_max_f64 v[0:1], v[12:13], v[42:43]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v62, 0, s[2:3]
-; GFX950-NEXT:    v_cndmask_b32_e64 v5, v63, v2, s[2:3]
-; GFX950-NEXT:    v_cndmask_b32_e64 v12, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v13, v1, v2, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[12:13], v[42:43]
+; GFX950-NEXT:    v_cndmask_b32_e64 v4, 0, v62, s[2:3]
+; GFX950-NEXT:    v_cndmask_b32_e64 v5, v2, v63, s[2:3]
+; GFX950-NEXT:    v_cndmask_b32_e32 v12, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v13, v2, v1, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(17)
 ; GFX950-NEXT:    v_max_f64 v[0:1], v[14:15], v[40:41]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[14:15], v[40:41]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v63, a15 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_read_b32 v62, a14 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v14, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v15, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v14, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v15, v2, v1, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(15)
 ; GFX950-NEXT:    v_max_f64 v[0:1], v[16:17], v[54:55]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[16:17], v[54:55]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v61, a13 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_read_b32 v57, a9 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v16, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v17, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v2, v1, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(13)
 ; GFX950-NEXT:    v_max_f64 v[0:1], v[18:19], v[52:53]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[18:19], v[52:53]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v56, a8 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_read_b32 v47, a7 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v18, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v19, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v18, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v19, v2, v1, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(11)
 ; GFX950-NEXT:    v_max_f64 v[0:1], v[20:21], v[50:51]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[20:21], v[50:51]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v46, a6 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_read_b32 v45, a5 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v20, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v21, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v20, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v21, v2, v1, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(9)
 ; GFX950-NEXT:    v_max_f64 v[0:1], v[22:23], v[48:49]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[48:49]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[22:23], v[48:49]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v44, a4 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_read_b32 v43, a3 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v22, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v23, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v22, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v23, v2, v1, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(6)
 ; GFX950-NEXT:    v_max_f64 v[0:1], v[24:25], v[34:35]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[34:35]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[24:25], v[34:35]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v42, a2 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_read_b32 v41, a1 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v24, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v25, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v24, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v25, v2, v1, vcc
 ; GFX950-NEXT:    v_accvgpr_read_b32 v40, a0 ; Reload Reuse
 ; GFX950-NEXT:    s_waitcnt vmcnt(4)
 ; GFX950-NEXT:    v_max_f64 v[0:1], v[26:27], v[32:33]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[32:33]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[26:27], v[32:33]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v26, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v27, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v26, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v27, v2, v1, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(2)
 ; GFX950-NEXT:    v_max_f64 v[0:1], v[28:29], v[36:37]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[36:37]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[28:29], v[36:37]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v28, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v29, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v28, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v29, v2, v1, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
 ; GFX950-NEXT:    v_max_f64 v[0:1], v[30:31], v[38:39]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[38:39]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[30:31], v[38:39]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v30, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v31, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v30, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v31, v2, v1, vcc
 ; GFX950-NEXT:    v_mov_b32_e32 v0, v58
 ; GFX950-NEXT:    v_mov_b32_e32 v1, v59
 ; GFX950-NEXT:    v_mov_b32_e32 v2, v60
@@ -2550,92 +2550,92 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX10-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:72
 ; GFX10-NEXT:    s_waitcnt vmcnt(23)
 ; GFX10-NEXT:    v_max_f64 v[82:83], v[0:1], v[31:32]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[31:32]
-; GFX10-NEXT:    s_waitcnt vmcnt(21)
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[31:32]
+; GFX10-NEXT:    s_waitcnt vmcnt(22)
 ; GFX10-NEXT:    v_max_f64 v[84:85], v[2:3], v[33:34]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[33:34]
-; GFX10-NEXT:    s_waitcnt vmcnt(19)
+; GFX10-NEXT:    v_cmp_o_f64_e64 s4, v[2:3], v[33:34]
+; GFX10-NEXT:    s_clause 0x3
+; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:120
+; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:116
+; GFX10-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:112
+; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:108
+; GFX10-NEXT:    s_waitcnt vmcnt(24)
 ; GFX10-NEXT:    v_max_f64 v[32:33], v[4:5], v[35:36]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[35:36]
-; GFX10-NEXT:    s_clause 0x7
-; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:112
-; GFX10-NEXT:    buffer_load_dword v67, off, s[0:3], s32 offset:104
-; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:108
-; GFX10-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:120
-; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:116
+; GFX10-NEXT:    v_cmp_o_f64_e64 s5, v[4:5], v[35:36]
+; GFX10-NEXT:    s_clause 0x2
 ; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX10-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:128
 ; GFX10-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:124
 ; GFX10-NEXT:    s_waitcnt vmcnt(24)
 ; GFX10-NEXT:    v_max_f64 v[34:35], v[6:7], v[48:49]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[48:49]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s6, v[6:7], v[48:49]
 ; GFX10-NEXT:    s_waitcnt vmcnt(21)
-; GFX10-NEXT:    v_cmp_u_f64_e64 s10, v[14:15], v[52:53]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s10, v[14:15], v[52:53]
 ; GFX10-NEXT:    s_waitcnt vmcnt(19)
-; GFX10-NEXT:    v_cmp_u_f64_e64 s9, v[12:13], v[54:55]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s9, v[12:13], v[54:55]
 ; GFX10-NEXT:    s_waitcnt vmcnt(17)
-; GFX10-NEXT:    v_cmp_u_f64_e64 s8, v[10:11], v[64:65]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s8, v[10:11], v[64:65]
 ; GFX10-NEXT:    s_waitcnt vmcnt(16)
 ; GFX10-NEXT:    v_max_f64 v[48:49], v[8:9], v[37:38]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s7, v[8:9], v[37:38]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s7, v[8:9], v[37:38]
 ; GFX10-NEXT:    v_max_f64 v[36:37], v[10:11], v[64:65]
 ; GFX10-NEXT:    v_max_f64 v[38:39], v[12:13], v[54:55]
 ; GFX10-NEXT:    v_max_f64 v[54:55], v[14:15], v[52:53]
 ; GFX10-NEXT:    s_waitcnt vmcnt(11)
 ; GFX10-NEXT:    v_max_f64 v[64:65], v[20:21], v[70:71]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s13, v[20:21], v[70:71]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s13, v[20:21], v[70:71]
 ; GFX10-NEXT:    s_waitcnt vmcnt(9)
-; GFX10-NEXT:    v_cmp_u_f64_e64 s12, v[18:19], v[80:81]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s12, v[18:19], v[80:81]
 ; GFX10-NEXT:    s_waitcnt vmcnt(8)
 ; GFX10-NEXT:    v_max_f64 v[52:53], v[16:17], v[50:51]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s11, v[16:17], v[50:51]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s11, v[16:17], v[50:51]
 ; GFX10-NEXT:    v_max_f64 v[50:51], v[18:19], v[80:81]
 ; GFX10-NEXT:    v_max_f64 v[70:71], v[22:23], v[68:69]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s14, v[22:23], v[68:69]
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v34, 0, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v35, 0x7ff80000, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v8, v48, 0, s7
-; GFX10-NEXT:    v_cndmask_b32_e64 v9, v49, 0x7ff80000, s7
-; GFX10-NEXT:    v_cndmask_b32_e64 v10, v36, 0, s8
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, v37, 0x7ff80000, s8
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, v38, 0, s9
-; GFX10-NEXT:    v_cndmask_b32_e64 v13, v39, 0x7ff80000, s9
-; GFX10-NEXT:    v_cndmask_b32_e64 v14, v54, 0, s10
-; GFX10-NEXT:    v_cndmask_b32_e64 v15, v55, 0x7ff80000, s10
-; GFX10-NEXT:    v_cndmask_b32_e64 v16, v52, 0, s11
-; GFX10-NEXT:    v_cndmask_b32_e64 v17, v53, 0x7ff80000, s11
-; GFX10-NEXT:    v_cndmask_b32_e64 v18, v50, 0, s12
-; GFX10-NEXT:    v_cndmask_b32_e64 v19, v51, 0x7ff80000, s12
-; GFX10-NEXT:    v_cndmask_b32_e64 v20, v64, 0, s13
-; GFX10-NEXT:    v_cndmask_b32_e64 v21, v65, 0x7ff80000, s13
-; GFX10-NEXT:    v_cndmask_b32_e64 v22, v70, 0, s14
-; GFX10-NEXT:    v_cndmask_b32_e64 v23, v71, 0x7ff80000, s14
-; GFX10-NEXT:    s_waitcnt vmcnt(6)
+; GFX10-NEXT:    v_cmp_o_f64_e64 s14, v[22:23], v[68:69]
+; GFX10-NEXT:    s_waitcnt vmcnt(7)
 ; GFX10-NEXT:    v_max_f64 v[68:69], v[24:25], v[66:67]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s15, v[24:25], v[66:67]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s15, v[24:25], v[66:67]
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, v34, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0x7ff80000, v35, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, v48, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0x7ff80000, v49, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, v36, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0x7ff80000, v37, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, v38, s9
+; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0x7ff80000, v39, s9
+; GFX10-NEXT:    v_cndmask_b32_e64 v14, 0, v54, s10
+; GFX10-NEXT:    v_cndmask_b32_e64 v15, 0x7ff80000, v55, s10
+; GFX10-NEXT:    v_cndmask_b32_e64 v16, 0, v52, s11
+; GFX10-NEXT:    v_cndmask_b32_e64 v17, 0x7ff80000, v53, s11
+; GFX10-NEXT:    v_cndmask_b32_e64 v18, 0, v50, s12
+; GFX10-NEXT:    v_cndmask_b32_e64 v19, 0x7ff80000, v51, s12
+; GFX10-NEXT:    v_cndmask_b32_e64 v20, 0, v64, s13
+; GFX10-NEXT:    v_cndmask_b32_e64 v21, 0x7ff80000, v65, s13
+; GFX10-NEXT:    v_cndmask_b32_e64 v22, 0, v70, s14
+; GFX10-NEXT:    v_cndmask_b32_e64 v23, 0x7ff80000, v71, s14
+; GFX10-NEXT:    v_cndmask_b32_e64 v24, 0, v68, s15
+; GFX10-NEXT:    v_cndmask_b32_e64 v25, 0x7ff80000, v69, s15
 ; GFX10-NEXT:    s_waitcnt vmcnt(5)
-; GFX10-NEXT:    v_max_f64 v[66:67], v[26:27], v[0:1]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s16, v[26:27], v[0:1]
+; GFX10-NEXT:    v_max_f64 v[80:81], v[28:29], v[0:1]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s17, v[28:29], v[0:1]
 ; GFX10-NEXT:    s_waitcnt vmcnt(3)
-; GFX10-NEXT:    v_max_f64 v[80:81], v[28:29], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s17, v[28:29], v[2:3]
+; GFX10-NEXT:    v_max_f64 v[66:67], v[26:27], v[2:3]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s16, v[26:27], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v82, vcc_lo
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_max_f64 v[86:87], v[30:31], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s18, v[30:31], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v82, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v83, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v84, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v85, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v32, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v33, 0x7ff80000, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v24, v68, 0, s15
-; GFX10-NEXT:    v_cndmask_b32_e64 v25, v69, 0x7ff80000, s15
-; GFX10-NEXT:    v_cndmask_b32_e64 v26, v66, 0, s16
-; GFX10-NEXT:    v_cndmask_b32_e64 v27, v67, 0x7ff80000, s16
-; GFX10-NEXT:    v_cndmask_b32_e64 v28, v80, 0, s17
-; GFX10-NEXT:    v_cndmask_b32_e64 v29, v81, 0x7ff80000, s17
-; GFX10-NEXT:    v_cndmask_b32_e64 v30, v86, 0, s18
-; GFX10-NEXT:    v_cndmask_b32_e64 v31, v87, 0x7ff80000, s18
+; GFX10-NEXT:    v_cmp_o_f64_e64 s18, v[30:31], v[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v83, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v84, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v85, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, v32, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v33, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v28, 0, v80, s17
+; GFX10-NEXT:    v_cndmask_b32_e64 v29, 0x7ff80000, v81, s17
+; GFX10-NEXT:    v_cndmask_b32_e64 v26, 0, v66, s16
+; GFX10-NEXT:    v_cndmask_b32_e64 v27, 0x7ff80000, v67, s16
+; GFX10-NEXT:    v_cndmask_b32_e64 v30, 0, v86, s18
+; GFX10-NEXT:    v_cndmask_b32_e64 v31, 0x7ff80000, v87, s18
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_maximum_v16f64:
@@ -2677,84 +2677,84 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX11-NEXT:    scratch_load_b32 v86, off, s32 offset:124
 ; GFX11-NEXT:    s_waitcnt vmcnt(30)
 ; GFX11-NEXT:    v_max_f64 v[96:97], v[0:1], v[32:33]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[32:33]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[32:33]
 ; GFX11-NEXT:    s_waitcnt vmcnt(28)
 ; GFX11-NEXT:    v_max_f64 v[32:33], v[2:3], v[34:35]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[34:35]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s0, v[2:3], v[34:35]
 ; GFX11-NEXT:    s_waitcnt vmcnt(26)
 ; GFX11-NEXT:    v_max_f64 v[34:35], v[4:5], v[36:37]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[36:37]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s1, v[4:5], v[36:37]
 ; GFX11-NEXT:    s_waitcnt vmcnt(24)
 ; GFX11-NEXT:    v_max_f64 v[36:37], v[6:7], v[38:39]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[38:39]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s2, v[6:7], v[38:39]
 ; GFX11-NEXT:    s_waitcnt vmcnt(22)
 ; GFX11-NEXT:    v_max_f64 v[38:39], v[8:9], v[48:49]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s3, v[8:9], v[48:49]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s3, v[8:9], v[48:49]
 ; GFX11-NEXT:    s_waitcnt vmcnt(20)
 ; GFX11-NEXT:    v_max_f64 v[48:49], v[10:11], v[50:51]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s4, v[10:11], v[50:51]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s4, v[10:11], v[50:51]
 ; GFX11-NEXT:    s_waitcnt vmcnt(18)
 ; GFX11-NEXT:    v_max_f64 v[50:51], v[12:13], v[52:53]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s5, v[12:13], v[52:53]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s5, v[12:13], v[52:53]
 ; GFX11-NEXT:    s_waitcnt vmcnt(16)
 ; GFX11-NEXT:    v_max_f64 v[52:53], v[14:15], v[54:55]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s6, v[14:15], v[54:55]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s6, v[14:15], v[54:55]
 ; GFX11-NEXT:    s_waitcnt vmcnt(14)
 ; GFX11-NEXT:    v_max_f64 v[54:55], v[16:17], v[64:65]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s7, v[16:17], v[64:65]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s7, v[16:17], v[64:65]
 ; GFX11-NEXT:    s_waitcnt vmcnt(12)
 ; GFX11-NEXT:    v_max_f64 v[64:65], v[18:19], v[66:67]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s8, v[18:19], v[66:67]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s8, v[18:19], v[66:67]
 ; GFX11-NEXT:    s_waitcnt vmcnt(10)
 ; GFX11-NEXT:    v_max_f64 v[66:67], v[20:21], v[68:69]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s9, v[20:21], v[68:69]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s9, v[20:21], v[68:69]
 ; GFX11-NEXT:    s_waitcnt vmcnt(8)
 ; GFX11-NEXT:    v_max_f64 v[68:69], v[22:23], v[70:71]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s10, v[22:23], v[70:71]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s10, v[22:23], v[70:71]
 ; GFX11-NEXT:    s_waitcnt vmcnt(6)
 ; GFX11-NEXT:    v_max_f64 v[70:71], v[24:25], v[80:81]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s11, v[24:25], v[80:81]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s11, v[24:25], v[80:81]
 ; GFX11-NEXT:    s_waitcnt vmcnt(4)
 ; GFX11-NEXT:    v_max_f64 v[80:81], v[26:27], v[82:83]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s12, v[26:27], v[82:83]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s12, v[26:27], v[82:83]
 ; GFX11-NEXT:    s_waitcnt vmcnt(2)
 ; GFX11-NEXT:    v_max_f64 v[82:83], v[28:29], v[84:85]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s13, v[28:29], v[84:85]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s13, v[28:29], v[84:85]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_max_f64 v[84:85], v[30:31], v[86:87]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s14, v[30:31], v[86:87]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v96, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v97, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v32, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v33, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v34, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v35, 0x7ff80000, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v6, v36, 0, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v7, v37, 0x7ff80000, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v8, v38, 0, s3
-; GFX11-NEXT:    v_cndmask_b32_e64 v9, v39, 0x7ff80000, s3
-; GFX11-NEXT:    v_cndmask_b32_e64 v10, v48, 0, s4
-; GFX11-NEXT:    v_cndmask_b32_e64 v11, v49, 0x7ff80000, s4
-; GFX11-NEXT:    v_cndmask_b32_e64 v12, v50, 0, s5
-; GFX11-NEXT:    v_cndmask_b32_e64 v13, v51, 0x7ff80000, s5
-; GFX11-NEXT:    v_cndmask_b32_e64 v14, v52, 0, s6
-; GFX11-NEXT:    v_cndmask_b32_e64 v15, v53, 0x7ff80000, s6
-; GFX11-NEXT:    v_cndmask_b32_e64 v16, v54, 0, s7
-; GFX11-NEXT:    v_cndmask_b32_e64 v17, v55, 0x7ff80000, s7
-; GFX11-NEXT:    v_cndmask_b32_e64 v18, v64, 0, s8
-; GFX11-NEXT:    v_cndmask_b32_e64 v19, v65, 0x7ff80000, s8
-; GFX11-NEXT:    v_cndmask_b32_e64 v20, v66, 0, s9
-; GFX11-NEXT:    v_cndmask_b32_e64 v21, v67, 0x7ff80000, s9
-; GFX11-NEXT:    v_cndmask_b32_e64 v22, v68, 0, s10
-; GFX11-NEXT:    v_cndmask_b32_e64 v23, v69, 0x7ff80000, s10
-; GFX11-NEXT:    v_cndmask_b32_e64 v24, v70, 0, s11
-; GFX11-NEXT:    v_cndmask_b32_e64 v25, v71, 0x7ff80000, s11
-; GFX11-NEXT:    v_cndmask_b32_e64 v26, v80, 0, s12
-; GFX11-NEXT:    v_cndmask_b32_e64 v27, v81, 0x7ff80000, s12
-; GFX11-NEXT:    v_cndmask_b32_e64 v28, v82, 0, s13
-; GFX11-NEXT:    v_cndmask_b32_e64 v29, v83, 0x7ff80000, s13
-; GFX11-NEXT:    v_cndmask_b32_e64 v30, v84, 0, s14
-; GFX11-NEXT:    v_cndmask_b32_e64 v31, v85, 0x7ff80000, s14
+; GFX11-NEXT:    v_cmp_o_f64_e64 s14, v[30:31], v[86:87]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v96, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v97, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v32, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v33, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, v34, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v35, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, 0, v36, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, 0x7ff80000, v37, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v8, 0, v38, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v9, 0x7ff80000, v39, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v10, 0, v48, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v11, 0x7ff80000, v49, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v12, 0, v50, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v13, 0x7ff80000, v51, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v14, 0, v52, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v15, 0x7ff80000, v53, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v16, 0, v54, s7
+; GFX11-NEXT:    v_cndmask_b32_e64 v17, 0x7ff80000, v55, s7
+; GFX11-NEXT:    v_cndmask_b32_e64 v18, 0, v64, s8
+; GFX11-NEXT:    v_cndmask_b32_e64 v19, 0x7ff80000, v65, s8
+; GFX11-NEXT:    v_cndmask_b32_e64 v20, 0, v66, s9
+; GFX11-NEXT:    v_cndmask_b32_e64 v21, 0x7ff80000, v67, s9
+; GFX11-NEXT:    v_cndmask_b32_e64 v22, 0, v68, s10
+; GFX11-NEXT:    v_cndmask_b32_e64 v23, 0x7ff80000, v69, s10
+; GFX11-NEXT:    v_cndmask_b32_e64 v24, 0, v70, s11
+; GFX11-NEXT:    v_cndmask_b32_e64 v25, 0x7ff80000, v71, s11
+; GFX11-NEXT:    v_cndmask_b32_e64 v26, 0, v80, s12
+; GFX11-NEXT:    v_cndmask_b32_e64 v27, 0x7ff80000, v81, s12
+; GFX11-NEXT:    v_cndmask_b32_e64 v28, 0, v82, s13
+; GFX11-NEXT:    v_cndmask_b32_e64 v29, 0x7ff80000, v83, s13
+; GFX11-NEXT:    v_cndmask_b32_e64 v30, 0, v84, s14
+; GFX11-NEXT:    v_cndmask_b32_e64 v31, 0x7ff80000, v85, s14
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v16f64:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
index dfd67873c3b86..6bddc2e5a7aad 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
@@ -13,60 +13,60 @@ define double @v_minimum_f64(double %src0, double %src1) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX900-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX950-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_f64:
@@ -131,60 +131,60 @@ define double @v_minimum_f64__nsz(double %src0, double %src1) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_f64__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_f64__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX900-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_f64__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX950-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_f64__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_f64__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_f64__nsz:
@@ -250,10 +250,10 @@ define double @v_minimum_f64__nnan_src0(double %arg0, double %src1) {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
 ; GFX7-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_f64__nnan_src0:
@@ -261,10 +261,10 @@ define double @v_minimum_f64__nnan_src0(double %arg0, double %src1) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
 ; GFX8-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_f64__nnan_src0:
@@ -272,10 +272,10 @@ define double @v_minimum_f64__nnan_src0(double %arg0, double %src1) {
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
 ; GFX900-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX900-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_f64__nnan_src0:
@@ -283,11 +283,11 @@ define double @v_minimum_f64__nnan_src0(double %arg0, double %src1) {
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
 ; GFX950-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX950-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_f64__nnan_src0:
@@ -295,9 +295,9 @@ define double @v_minimum_f64__nnan_src0(double %arg0, double %src1) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
 ; GFX10-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_f64__nnan_src0:
@@ -306,10 +306,10 @@ define double @v_minimum_f64__nnan_src0(double %arg0, double %src1) {
 ; GFX11-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_f64__nnan_src0:
@@ -334,10 +334,10 @@ define double @v_minimum_f64__nnan_src1(double %src0, double %arg1) {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
 ; GFX7-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_f64__nnan_src1:
@@ -345,10 +345,10 @@ define double @v_minimum_f64__nnan_src1(double %src0, double %arg1) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
 ; GFX8-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_f64__nnan_src1:
@@ -356,10 +356,10 @@ define double @v_minimum_f64__nnan_src1(double %src0, double %arg1) {
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
 ; GFX900-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX900-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_f64__nnan_src1:
@@ -367,11 +367,11 @@ define double @v_minimum_f64__nnan_src1(double %src0, double %arg1) {
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
 ; GFX950-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX950-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_f64__nnan_src1:
@@ -379,9 +379,9 @@ define double @v_minimum_f64__nnan_src1(double %src0, double %arg1) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
 ; GFX10-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_f64__nnan_src1:
@@ -390,10 +390,10 @@ define double @v_minimum_f64__nnan_src1(double %src0, double %arg1) {
 ; GFX11-NEXT:    v_add_f64 v[2:3], v[2:3], 1.0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_f64__nnan_src1:
@@ -520,85 +520,85 @@ define <2 x double> @v_minimum_v2f64(<2 x double> %src0, <2 x double> %src1) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX7-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
 ; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v9, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[4:5]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v2f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX8-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
 ; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[4:5]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v2f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX900-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
 ; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v3, v9, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[4:5]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v2f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX950-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
 ; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v9, v8, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v8, v9, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[2:3], v[6:7]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v8, v5, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v2f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
 ; GFX10-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
+; GFX10-NEXT:    v_cmp_o_f64_e64 s4, v[2:3], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v5, s4
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v2f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
 ; GFX11-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s0, v[2:3], v[6:7]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v5, s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v2f64:
@@ -670,85 +670,85 @@ define <2 x double> @v_minimum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX7-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
 ; GFX7-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v9, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[4:5]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v2f64__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX8-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
 ; GFX8-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[4:5]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v2f64__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX900-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7]
 ; GFX900-NEXT:    v_mov_b32_e32 v3, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v9, v3, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v3, v9, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[4:5]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v2f64__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX950-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
 ; GFX950-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v9, v8, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v8, v9, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[2:3], v[6:7]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v8, v5, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v2f64__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
 ; GFX10-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4
+; GFX10-NEXT:    v_cmp_o_f64_e64 s4, v[2:3], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v5, s4
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v2f64__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
 ; GFX11-NEXT:    v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[6:7]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s0, v[2:3], v[6:7]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v4, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v5, s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v2f64__nsz:
@@ -955,109 +955,109 @@ define <3 x double> @v_minimum_v3f64(<3 x double> %src0, <3 x double> %src1) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX7-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9]
 ; GFX7-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11]
 ; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v13, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v5, v7, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[6:7]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v3f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX8-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9]
 ; GFX8-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11]
 ; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v13, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v5, v7, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[6:7]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v3f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX900-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9]
 ; GFX900-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11]
 ; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v13, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v5, v7, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[6:7]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v3f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX950-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc
 ; GFX950-NEXT:    v_mov_b32_e32 v12, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v13, v12, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v12, v13, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[2:3], v[8:9]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v12, v7, vcc
 ; GFX950-NEXT:    v_min_f64 v[6:7], v[4:5], v[10:11]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[4:5], v[10:11]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v12, v7, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v3f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
 ; GFX10-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s4, v[2:3], v[8:9]
 ; GFX10-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
+; GFX10-NEXT:    v_cmp_o_f64_e64 s5, v[4:5], v[10:11]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v13, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v7, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v9, s5
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v3f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
 ; GFX11-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s0, v[2:3], v[8:9]
 ; GFX11-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
+; GFX11-NEXT:    v_cmp_o_f64_e64 s1, v[4:5], v[10:11]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v13, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v7, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v9, s1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v3f64:
@@ -1136,109 +1136,109 @@ define <3 x double> @v_minimum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX7-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9]
 ; GFX7-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11]
 ; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v13, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v5, v7, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[6:7]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v3f64__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX8-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9]
 ; GFX8-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11]
 ; GFX8-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v13, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v5, v7, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[6:7]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v3f64__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX900-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9]
 ; GFX900-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11]
 ; GFX900-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v5, v13, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v5, v7, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[6:7]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v3f64__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX950-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc
 ; GFX950-NEXT:    v_mov_b32_e32 v12, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v13, v12, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v12, v13, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[2:3], v[8:9]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v7, v12, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v12, v7, vcc
 ; GFX950-NEXT:    v_min_f64 v[6:7], v[4:5], v[10:11]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[4:5], v[10:11]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v12, v7, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v3f64__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
 ; GFX10-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[8:9]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s4, v[2:3], v[8:9]
 ; GFX10-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[10:11]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5
+; GFX10-NEXT:    v_cmp_o_f64_e64 s5, v[4:5], v[10:11]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v13, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v7, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v9, s5
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v3f64__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
 ; GFX11-NEXT:    v_min_f64 v[6:7], v[2:3], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[8:9]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s0, v[2:3], v[8:9]
 ; GFX11-NEXT:    v_min_f64 v[8:9], v[4:5], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[10:11]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v6, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v8, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
+; GFX11-NEXT:    v_cmp_o_f64_e64 s1, v[4:5], v[10:11]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v12, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v13, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v6, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v7, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, v8, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v9, s1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v3f64__nsz:
@@ -1317,134 +1317,134 @@ define <4 x double> @v_minimum_v4f64(<4 x double> %src0, <4 x double> %src1) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX7-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
 ; GFX7-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13]
 ; GFX7-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15]
 ; GFX7-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v17, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v7, v9, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v7, v11, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[8:9]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v4f64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX8-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
 ; GFX8-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13]
 ; GFX8-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15]
 ; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v17, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v7, v9, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v7, v11, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[8:9]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v4f64:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX900-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
 ; GFX900-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13]
 ; GFX900-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15]
 ; GFX900-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v7, v17, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v7, v9, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v7, v11, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[8:9]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v4f64:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX950-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
 ; GFX950-NEXT:    v_mov_b32_e32 v16, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v16, v17, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[2:3], v[10:11]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v16, v9, vcc
 ; GFX950-NEXT:    v_min_f64 v[8:9], v[4:5], v[12:13]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[4:5], v[12:13]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v16, v9, vcc
 ; GFX950-NEXT:    v_min_f64 v[8:9], v[6:7], v[14:15]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[6:7], v[14:15]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, 0, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v16, v9, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v4f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9]
 ; GFX10-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s4, v[2:3], v[10:11]
 ; GFX10-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s5, v[4:5], v[12:13]
 ; GFX10-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
+; GFX10-NEXT:    v_cmp_o_f64_e64 s6, v[6:7], v[14:15]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v9, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v11, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0x7ff80000, v13, s6
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v4f64:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9]
 ; GFX11-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s0, v[2:3], v[10:11]
 ; GFX11-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s1, v[4:5], v[12:13]
 ; GFX11-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
+; GFX11-NEXT:    v_cmp_o_f64_e64 s2, v[6:7], v[14:15]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v9, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v11, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, 0x7ff80000, v13, s2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v4f64:
@@ -1530,134 +1530,134 @@ define <4 x double> @v_minimum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX7-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
 ; GFX7-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13]
 ; GFX7-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15]
 ; GFX7-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v17, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v7, v9, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v7, v11, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[8:9]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v4f64__nsz:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX8-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
 ; GFX8-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13]
 ; GFX8-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15]
 ; GFX8-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v17, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v7, v9, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v7, v11, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[8:9]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v4f64__nsz:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX900-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11]
 ; GFX900-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13]
 ; GFX900-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15]
 ; GFX900-NEXT:    v_mov_b32_e32 v7, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v17, v7, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v9, v7, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v11, v7, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v13, v7, s[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v7, v17, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v7, v9, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v7, v11, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[8:9]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v4f64__nsz:
 ; GFX950:       ; %bb.0:
 ; GFX950-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX950-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
 ; GFX950-NEXT:    s_nop 0
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
 ; GFX950-NEXT:    v_mov_b32_e32 v16, 0x7ff80000
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v16, v17, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[2:3], v[10:11]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v9, v16, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v16, v9, vcc
 ; GFX950-NEXT:    v_min_f64 v[8:9], v[4:5], v[12:13]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[4:5], v[12:13]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v9, v16, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v16, v9, vcc
 ; GFX950-NEXT:    v_min_f64 v[8:9], v[6:7], v[14:15]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[6:7], v[14:15]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, 0, v8, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v16, v9, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v4f64__nsz:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9]
 ; GFX10-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[10:11]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s4, v[2:3], v[10:11]
 ; GFX10-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[12:13]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s5, v[4:5], v[12:13]
 ; GFX10-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[14:15]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6
+; GFX10-NEXT:    v_cmp_o_f64_e64 s6, v[6:7], v[14:15]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v9, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v11, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0x7ff80000, v13, s6
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v4f64__nsz:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9]
 ; GFX11-NEXT:    v_min_f64 v[8:9], v[2:3], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[10:11]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s0, v[2:3], v[10:11]
 ; GFX11-NEXT:    v_min_f64 v[10:11], v[4:5], v[12:13]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[12:13]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s1, v[4:5], v[12:13]
 ; GFX11-NEXT:    v_min_f64 v[12:13], v[6:7], v[14:15]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[14:15]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v8, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v10, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v6, v12, 0, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
+; GFX11-NEXT:    v_cmp_o_f64_e64 s2, v[6:7], v[14:15]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v8, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v9, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, v10, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v11, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, 0, v12, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, 0x7ff80000, v13, s2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v4f64__nsz:
@@ -1744,39 +1744,39 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) {
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX7-NEXT:    v_min_f64 v[32:33], v[0:1], v[16:17]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[16:17]
 ; GFX7-NEXT:    v_min_f64 v[16:17], v[2:3], v[18:19]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[18:19]
 ; GFX7-NEXT:    v_mov_b32_e32 v34, 0x7ff80000
 ; GFX7-NEXT:    v_min_f64 v[18:19], v[4:5], v[20:21]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[20:21]
 ; GFX7-NEXT:    v_min_f64 v[20:21], v[6:7], v[22:23]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[22:23]
 ; GFX7-NEXT:    v_min_f64 v[22:23], v[8:9], v[24:25]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[10:11], v[8:9], v[24:25]
 ; GFX7-NEXT:    v_min_f64 v[24:25], v[10:11], v[26:27]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[12:13], v[10:11], v[26:27]
 ; GFX7-NEXT:    v_min_f64 v[26:27], v[12:13], v[28:29]
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v34, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v17, v34, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v19, v34, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v21, v34, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e64 v9, v23, v34, s[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e64 v11, v25, v34, s[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s[14:15]
-; GFX7-NEXT:    v_cndmask_b32_e64 v13, v27, v34, s[14:15]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v34, v33, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, v16, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v34, v17, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, 0, v18, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v34, v19, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, 0, v20, s[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v34, v21, s[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v8, 0, v22, s[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e64 v9, v34, v23, s[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e64 v10, 0, v24, s[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e64 v11, v34, v25, s[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e64 v12, 0, v26, s[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e64 v13, v34, v27, s[14:15]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    v_min_f64 v[16:17], v[14:15], v[30:31]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX7-NEXT:    v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v15, v17, v34, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[14:15], v[30:31]
+; GFX7-NEXT:    v_cndmask_b32_e32 v14, 0, v16, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v15, v34, v17, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v8f64:
@@ -1784,39 +1784,39 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX8-NEXT:    v_min_f64 v[32:33], v[0:1], v[16:17]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[16:17]
 ; GFX8-NEXT:    v_min_f64 v[16:17], v[2:3], v[18:19]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[18:19]
 ; GFX8-NEXT:    v_mov_b32_e32 v34, 0x7ff80000
 ; GFX8-NEXT:    v_min_f64 v[18:19], v[4:5], v[20:21]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[20:21]
 ; GFX8-NEXT:    v_min_f64 v[20:21], v[6:7], v[22:23]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[22:23]
 ; GFX8-NEXT:    v_min_f64 v[22:23], v[8:9], v[24:25]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[10:11], v[8:9], v[24:25]
 ; GFX8-NEXT:    v_min_f64 v[24:25], v[10:11], v[26:27]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[12:13], v[10:11], v[26:27]
 ; GFX8-NEXT:    v_min_f64 v[26:27], v[12:13], v[28:29]
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v34, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v17, v34, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v19, v34, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v21, v34, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e64 v9, v23, v34, s[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e64 v11, v25, v34, s[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e64 v13, v27, v34, s[14:15]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v34, v33, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, v16, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v34, v17, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, v18, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v34, v19, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, v20, s[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v34, v21, s[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, v22, s[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e64 v9, v34, v23, s[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e64 v10, 0, v24, s[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e64 v11, v34, v25, s[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e64 v12, 0, v26, s[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e64 v13, v34, v27, s[14:15]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_min_f64 v[16:17], v[14:15], v[30:31]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX8-NEXT:    v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v15, v17, v34, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[14:15], v[30:31]
+; GFX8-NEXT:    v_cndmask_b32_e32 v14, 0, v16, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v15, v34, v17, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v8f64:
@@ -1824,39 +1824,39 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) {
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX900-NEXT:    v_min_f64 v[32:33], v[0:1], v[16:17]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[16:17]
 ; GFX900-NEXT:    v_min_f64 v[16:17], v[2:3], v[18:19]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[18:19]
 ; GFX900-NEXT:    v_mov_b32_e32 v34, 0x7ff80000
 ; GFX900-NEXT:    v_min_f64 v[18:19], v[4:5], v[20:21]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[20:21]
 ; GFX900-NEXT:    v_min_f64 v[20:21], v[6:7], v[22:23]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[22:23]
 ; GFX900-NEXT:    v_min_f64 v[22:23], v[8:9], v[24:25]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[10:11], v[8:9], v[24:25]
 ; GFX900-NEXT:    v_min_f64 v[24:25], v[10:11], v[26:27]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[26:27]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[12:13], v[10:11], v[26:27]
 ; GFX900-NEXT:    v_min_f64 v[26:27], v[12:13], v[28:29]
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[28:29]
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v33, v34, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v17, v34, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v19, v34, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v21, v34, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s[10:11]
-; GFX900-NEXT:    v_cndmask_b32_e64 v9, v23, v34, s[10:11]
-; GFX900-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s[12:13]
-; GFX900-NEXT:    v_cndmask_b32_e64 v11, v25, v34, s[12:13]
-; GFX900-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s[14:15]
-; GFX900-NEXT:    v_cndmask_b32_e64 v13, v27, v34, s[14:15]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[14:15], v[12:13], v[28:29]
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v32, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v34, v33, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, 0, v16, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v34, v17, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, 0, v18, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v34, v19, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, 0, v20, s[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v34, v21, s[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v8, 0, v22, s[10:11]
+; GFX900-NEXT:    v_cndmask_b32_e64 v9, v34, v23, s[10:11]
+; GFX900-NEXT:    v_cndmask_b32_e64 v10, 0, v24, s[12:13]
+; GFX900-NEXT:    v_cndmask_b32_e64 v11, v34, v25, s[12:13]
+; GFX900-NEXT:    v_cndmask_b32_e64 v12, 0, v26, s[14:15]
+; GFX900-NEXT:    v_cndmask_b32_e64 v13, v34, v27, s[14:15]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    v_min_f64 v[16:17], v[14:15], v[30:31]
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
-; GFX900-NEXT:    v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX900-NEXT:    v_cndmask_b32_e32 v15, v17, v34, vcc
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[14:15], v[30:31]
+; GFX900-NEXT:    v_cndmask_b32_e32 v14, 0, v16, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v15, v34, v17, vcc
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v8f64:
@@ -1865,42 +1865,42 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) {
 ; GFX950-NEXT:    scratch_load_dword v31, off, s32
 ; GFX950-NEXT:    v_mov_b32_e32 v54, 0x7ff80000
 ; GFX950-NEXT:    v_min_f64 v[32:33], v[0:1], v[16:17]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[16:17]
 ; GFX950-NEXT:    v_min_f64 v[34:35], v[2:3], v[18:19]
 ; GFX950-NEXT:    v_min_f64 v[36:37], v[4:5], v[20:21]
-; GFX950-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v1, v33, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX950-NEXT:    v_cndmask_b32_e32 v0, 0, v32, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v1, v54, v33, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[2:3], v[18:19]
 ; GFX950-NEXT:    v_min_f64 v[38:39], v[6:7], v[22:23]
 ; GFX950-NEXT:    v_min_f64 v[48:49], v[8:9], v[24:25]
-; GFX950-NEXT:    v_cndmask_b32_e64 v2, v34, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v3, v35, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
+; GFX950-NEXT:    v_cndmask_b32_e32 v2, 0, v34, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v3, v54, v35, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[4:5], v[20:21]
 ; GFX950-NEXT:    v_min_f64 v[50:51], v[10:11], v[26:27]
 ; GFX950-NEXT:    v_min_f64 v[52:53], v[12:13], v[28:29]
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v36, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v5, v37, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
+; GFX950-NEXT:    v_cndmask_b32_e32 v4, 0, v36, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v5, v54, v37, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[6:7], v[22:23]
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
 ; GFX950-NEXT:    v_min_f64 v[16:17], v[14:15], v[30:31]
-; GFX950-NEXT:    v_cndmask_b32_e64 v6, v38, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v7, v39, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
+; GFX950-NEXT:    v_cndmask_b32_e32 v6, 0, v38, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v7, v54, v39, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[8:9], v[24:25]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v8, v48, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v9, v49, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, 0, v48, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v54, v49, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[10:11], v[26:27]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v10, v50, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v11, v51, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX950-NEXT:    v_cndmask_b32_e32 v10, 0, v50, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v11, v54, v51, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[12:13], v[28:29]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v12, v52, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v13, v53, v54, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
+; GFX950-NEXT:    v_cndmask_b32_e32 v12, 0, v52, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v13, v54, v53, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[14:15], v[30:31]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v14, v16, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v15, v17, v54, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v14, 0, v16, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v15, v54, v17, vcc
 ; GFX950-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_minimum_v8f64:
@@ -1908,38 +1908,38 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX10-NEXT:    v_min_f64 v[32:33], v[0:1], v[16:17]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[16:17]
 ; GFX10-NEXT:    v_min_f64 v[16:17], v[2:3], v[18:19]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[18:19]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s4, v[2:3], v[18:19]
 ; GFX10-NEXT:    v_min_f64 v[18:19], v[4:5], v[20:21]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[20:21]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s5, v[4:5], v[20:21]
 ; GFX10-NEXT:    v_min_f64 v[20:21], v[6:7], v[22:23]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[22:23]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s6, v[6:7], v[22:23]
 ; GFX10-NEXT:    v_min_f64 v[22:23], v[8:9], v[24:25]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s7, v[8:9], v[24:25]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s7, v[8:9], v[24:25]
 ; GFX10-NEXT:    v_min_f64 v[24:25], v[10:11], v[26:27]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s8, v[10:11], v[26:27]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s8, v[10:11], v[26:27]
 ; GFX10-NEXT:    v_min_f64 v[26:27], v[12:13], v[28:29]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s9, v[12:13], v[28:29]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v17, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v19, 0x7ff80000, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v21, 0x7ff80000, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s7
-; GFX10-NEXT:    v_cndmask_b32_e64 v9, v23, 0x7ff80000, s7
-; GFX10-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s8
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, v25, 0x7ff80000, s8
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s9
-; GFX10-NEXT:    v_cndmask_b32_e64 v13, v27, 0x7ff80000, s9
+; GFX10-NEXT:    v_cmp_o_f64_e64 s9, v[12:13], v[28:29]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v32, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v33, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v16, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v17, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, v18, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v19, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, v20, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0x7ff80000, v21, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, v22, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0x7ff80000, v23, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, v24, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0x7ff80000, v25, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, v26, s9
+; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0x7ff80000, v27, s9
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_min_f64 v[28:29], v[14:15], v[30:31]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s10, v[14:15], v[30:31]
-; GFX10-NEXT:    v_cndmask_b32_e64 v14, v28, 0, s10
-; GFX10-NEXT:    v_cndmask_b32_e64 v15, v29, 0x7ff80000, s10
+; GFX10-NEXT:    v_cmp_o_f64_e64 s10, v[14:15], v[30:31]
+; GFX10-NEXT:    v_cndmask_b32_e64 v14, 0, v28, s10
+; GFX10-NEXT:    v_cndmask_b32_e64 v15, 0x7ff80000, v29, s10
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v8f64:
@@ -1947,39 +1947,39 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    scratch_load_b32 v31, off, s32
 ; GFX11-NEXT:    v_min_f64 v[32:33], v[0:1], v[16:17]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[16:17]
 ; GFX11-NEXT:    v_min_f64 v[16:17], v[2:3], v[18:19]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[18:19]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s0, v[2:3], v[18:19]
 ; GFX11-NEXT:    v_min_f64 v[18:19], v[4:5], v[20:21]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[20:21]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s1, v[4:5], v[20:21]
 ; GFX11-NEXT:    v_min_f64 v[20:21], v[6:7], v[22:23]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[22:23]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s2, v[6:7], v[22:23]
 ; GFX11-NEXT:    v_min_f64 v[22:23], v[8:9], v[24:25]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s3, v[8:9], v[24:25]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s3, v[8:9], v[24:25]
 ; GFX11-NEXT:    v_min_f64 v[24:25], v[10:11], v[26:27]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s4, v[10:11], v[26:27]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s4, v[10:11], v[26:27]
 ; GFX11-NEXT:    v_min_f64 v[26:27], v[12:13], v[28:29]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s5, v[12:13], v[28:29]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v16, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v17, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v18, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v19, 0x7ff80000, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v6, v20, 0, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v7, v21, 0x7ff80000, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v8, v22, 0, s3
-; GFX11-NEXT:    v_cndmask_b32_e64 v9, v23, 0x7ff80000, s3
-; GFX11-NEXT:    v_cndmask_b32_e64 v10, v24, 0, s4
-; GFX11-NEXT:    v_cndmask_b32_e64 v11, v25, 0x7ff80000, s4
-; GFX11-NEXT:    v_cndmask_b32_e64 v12, v26, 0, s5
-; GFX11-NEXT:    v_cndmask_b32_e64 v13, v27, 0x7ff80000, s5
+; GFX11-NEXT:    v_cmp_o_f64_e64 s5, v[12:13], v[28:29]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v32, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v33, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v16, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v17, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, v18, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v19, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, 0, v20, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, 0x7ff80000, v21, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v8, 0, v22, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v9, 0x7ff80000, v23, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v10, 0, v24, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v11, 0x7ff80000, v25, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v12, 0, v26, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v13, 0x7ff80000, v27, s5
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_min_f64 v[28:29], v[14:15], v[30:31]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s6, v[14:15], v[30:31]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s6, v[14:15], v[30:31]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v14, v28, 0, s6
-; GFX11-NEXT:    v_cndmask_b32_e64 v15, v29, 0x7ff80000, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v14, 0, v28, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v15, 0x7ff80000, v29, s6
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v8f64:
@@ -2011,117 +2011,117 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:8
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[31:32]
 ; GFX7-NEXT:    v_min_f64 v[0:1], v[0:1], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:16
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:12
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[31:32]
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[2:3], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:24
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:20
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[4:5]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[31:32]
 ; GFX7-NEXT:    v_min_f64 v[4:5], v[4:5], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:32
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, v4, 0, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[6:7]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[31:32]
 ; GFX7-NEXT:    v_min_f64 v[6:7], v[6:7], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:36
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:40
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, 0, v6, s[8:9]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[10:11], v[8:9], v[31:32]
 ; GFX7-NEXT:    v_min_f64 v[8:9], v[8:9], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:48
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:44
-; GFX7-NEXT:    v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[10:11]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[12:13], v[10:11], v[31:32]
 ; GFX7-NEXT:    v_min_f64 v[10:11], v[10:11], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:56
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX7-NEXT:    v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e64 v10, 0, v10, s[12:13]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[14:15], v[12:13], v[31:32]
 ; GFX7-NEXT:    v_min_f64 v[12:13], v[12:13], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:64
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:60
-; GFX7-NEXT:    v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e64 v12, 0, v12, s[14:15]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[16:17], v[14:15], v[31:32]
 ; GFX7-NEXT:    v_min_f64 v[14:15], v[14:15], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:72
-; GFX7-NEXT:    v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX7-NEXT:    v_cndmask_b32_e64 v14, 0, v14, s[16:17]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[18:19], v[16:17], v[31:32]
 ; GFX7-NEXT:    v_min_f64 v[16:17], v[16:17], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:80
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:76
-; GFX7-NEXT:    v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX7-NEXT:    v_cndmask_b32_e64 v16, 0, v16, s[18:19]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[20:21], v[18:19], v[31:32]
 ; GFX7-NEXT:    v_min_f64 v[18:19], v[18:19], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:88
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX7-NEXT:    v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX7-NEXT:    v_cndmask_b32_e64 v18, 0, v18, s[20:21]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[22:23], v[20:21], v[31:32]
 ; GFX7-NEXT:    v_min_f64 v[20:21], v[20:21], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:96
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:92
-; GFX7-NEXT:    v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX7-NEXT:    v_cndmask_b32_e64 v20, 0, v20, s[22:23]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[24:25], v[22:23], v[31:32]
 ; GFX7-NEXT:    v_min_f64 v[22:23], v[22:23], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:100
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:104
-; GFX7-NEXT:    v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX7-NEXT:    v_cndmask_b32_e64 v22, 0, v22, s[24:25]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[26:27], v[24:25], v[31:32]
 ; GFX7-NEXT:    v_min_f64 v[24:25], v[24:25], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:112
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:108
-; GFX7-NEXT:    v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX7-NEXT:    v_cndmask_b32_e64 v24, 0, v24, s[26:27]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[28:29], v[26:27], v[31:32]
 ; GFX7-NEXT:    v_min_f64 v[26:27], v[26:27], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:120
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:116
-; GFX7-NEXT:    v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX7-NEXT:    v_cndmask_b32_e64 v26, 0, v26, s[28:29]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[40:41], v[28:29], v[31:32]
 ; GFX7-NEXT:    v_min_f64 v[28:29], v[28:29], v[31:32]
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX7-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:128
 ; GFX7-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX7-NEXT:    v_cndmask_b32_e64 v28, v28, 0, s[40:41]
+; GFX7-NEXT:    v_cndmask_b32_e64 v28, 0, v28, s[40:41]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
+; GFX7-NEXT:    v_cmp_o_f64_e64 s[42:43], v[30:31], v[32:33]
 ; GFX7-NEXT:    v_min_f64 v[30:31], v[30:31], v[32:33]
 ; GFX7-NEXT:    v_mov_b32_e32 v32, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v3, v32, s[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, v32, s[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v7, v7, v32, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v9, v9, v32, s[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e64 v11, v11, v32, s[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e64 v13, v13, v32, s[14:15]
-; GFX7-NEXT:    v_cndmask_b32_e64 v15, v15, v32, s[16:17]
-; GFX7-NEXT:    v_cndmask_b32_e64 v17, v17, v32, s[18:19]
-; GFX7-NEXT:    v_cndmask_b32_e64 v19, v19, v32, s[20:21]
-; GFX7-NEXT:    v_cndmask_b32_e64 v21, v21, v32, s[22:23]
-; GFX7-NEXT:    v_cndmask_b32_e64 v23, v23, v32, s[24:25]
-; GFX7-NEXT:    v_cndmask_b32_e64 v25, v25, v32, s[26:27]
-; GFX7-NEXT:    v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX7-NEXT:    v_cndmask_b32_e64 v29, v29, v32, s[40:41]
-; GFX7-NEXT:    v_cndmask_b32_e64 v31, v31, v32, s[42:43]
-; GFX7-NEXT:    v_cndmask_b32_e64 v30, v30, 0, s[42:43]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v32, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v32, v3, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v32, v5, s[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e64 v7, v32, v7, s[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e64 v9, v32, v9, s[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e64 v11, v32, v11, s[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e64 v13, v32, v13, s[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e64 v15, v32, v15, s[16:17]
+; GFX7-NEXT:    v_cndmask_b32_e64 v17, v32, v17, s[18:19]
+; GFX7-NEXT:    v_cndmask_b32_e64 v19, v32, v19, s[20:21]
+; GFX7-NEXT:    v_cndmask_b32_e64 v21, v32, v21, s[22:23]
+; GFX7-NEXT:    v_cndmask_b32_e64 v23, v32, v23, s[24:25]
+; GFX7-NEXT:    v_cndmask_b32_e64 v25, v32, v25, s[26:27]
+; GFX7-NEXT:    v_cndmask_b32_e64 v27, v32, v27, s[28:29]
+; GFX7-NEXT:    v_cndmask_b32_e64 v29, v32, v29, s[40:41]
+; GFX7-NEXT:    v_cndmask_b32_e64 v31, v32, v31, s[42:43]
+; GFX7-NEXT:    v_cndmask_b32_e64 v30, 0, v30, s[42:43]
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_minimum_v16f64:
@@ -2130,117 +2130,117 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:8
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[31:32]
 ; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:16
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:12
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[31:32]
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[2:3], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:24
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:20
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[4:5]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[31:32]
 ; GFX8-NEXT:    v_min_f64 v[4:5], v[4:5], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:32
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, 0, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[6:7]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[31:32]
 ; GFX8-NEXT:    v_min_f64 v[6:7], v[6:7], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:36
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:40
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, v6, s[8:9]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[10:11], v[8:9], v[31:32]
 ; GFX8-NEXT:    v_min_f64 v[8:9], v[8:9], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:48
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:44
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[10:11]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[12:13], v[10:11], v[31:32]
 ; GFX8-NEXT:    v_min_f64 v[10:11], v[10:11], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:56
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX8-NEXT:    v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e64 v10, 0, v10, s[12:13]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[14:15], v[12:13], v[31:32]
 ; GFX8-NEXT:    v_min_f64 v[12:13], v[12:13], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:64
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:60
-; GFX8-NEXT:    v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e64 v12, 0, v12, s[14:15]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[16:17], v[14:15], v[31:32]
 ; GFX8-NEXT:    v_min_f64 v[14:15], v[14:15], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:72
-; GFX8-NEXT:    v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX8-NEXT:    v_cndmask_b32_e64 v14, 0, v14, s[16:17]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[18:19], v[16:17], v[31:32]
 ; GFX8-NEXT:    v_min_f64 v[16:17], v[16:17], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:80
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:76
-; GFX8-NEXT:    v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX8-NEXT:    v_cndmask_b32_e64 v16, 0, v16, s[18:19]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[20:21], v[18:19], v[31:32]
 ; GFX8-NEXT:    v_min_f64 v[18:19], v[18:19], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:88
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX8-NEXT:    v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX8-NEXT:    v_cndmask_b32_e64 v18, 0, v18, s[20:21]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[22:23], v[20:21], v[31:32]
 ; GFX8-NEXT:    v_min_f64 v[20:21], v[20:21], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:96
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:92
-; GFX8-NEXT:    v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX8-NEXT:    v_cndmask_b32_e64 v20, 0, v20, s[22:23]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[24:25], v[22:23], v[31:32]
 ; GFX8-NEXT:    v_min_f64 v[22:23], v[22:23], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:100
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:104
-; GFX8-NEXT:    v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX8-NEXT:    v_cndmask_b32_e64 v22, 0, v22, s[24:25]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[26:27], v[24:25], v[31:32]
 ; GFX8-NEXT:    v_min_f64 v[24:25], v[24:25], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:112
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:108
-; GFX8-NEXT:    v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX8-NEXT:    v_cndmask_b32_e64 v24, 0, v24, s[26:27]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[28:29], v[26:27], v[31:32]
 ; GFX8-NEXT:    v_min_f64 v[26:27], v[26:27], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:120
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:116
-; GFX8-NEXT:    v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX8-NEXT:    v_cndmask_b32_e64 v26, 0, v26, s[28:29]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[40:41], v[28:29], v[31:32]
 ; GFX8-NEXT:    v_min_f64 v[28:29], v[28:29], v[31:32]
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX8-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:128
 ; GFX8-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX8-NEXT:    v_cndmask_b32_e64 v28, v28, 0, s[40:41]
+; GFX8-NEXT:    v_cndmask_b32_e64 v28, 0, v28, s[40:41]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
+; GFX8-NEXT:    v_cmp_o_f64_e64 s[42:43], v[30:31], v[32:33]
 ; GFX8-NEXT:    v_min_f64 v[30:31], v[30:31], v[32:33]
 ; GFX8-NEXT:    v_mov_b32_e32 v32, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v32, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v32, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v32, s[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e64 v9, v9, v32, s[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e64 v11, v11, v32, s[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e64 v13, v13, v32, s[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e64 v15, v15, v32, s[16:17]
-; GFX8-NEXT:    v_cndmask_b32_e64 v17, v17, v32, s[18:19]
-; GFX8-NEXT:    v_cndmask_b32_e64 v19, v19, v32, s[20:21]
-; GFX8-NEXT:    v_cndmask_b32_e64 v21, v21, v32, s[22:23]
-; GFX8-NEXT:    v_cndmask_b32_e64 v23, v23, v32, s[24:25]
-; GFX8-NEXT:    v_cndmask_b32_e64 v25, v25, v32, s[26:27]
-; GFX8-NEXT:    v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX8-NEXT:    v_cndmask_b32_e64 v29, v29, v32, s[40:41]
-; GFX8-NEXT:    v_cndmask_b32_e64 v31, v31, v32, s[42:43]
-; GFX8-NEXT:    v_cndmask_b32_e64 v30, v30, 0, s[42:43]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v32, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v32, v3, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v32, v5, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v32, v7, s[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e64 v9, v32, v9, s[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e64 v11, v32, v11, s[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e64 v13, v32, v13, s[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e64 v15, v32, v15, s[16:17]
+; GFX8-NEXT:    v_cndmask_b32_e64 v17, v32, v17, s[18:19]
+; GFX8-NEXT:    v_cndmask_b32_e64 v19, v32, v19, s[20:21]
+; GFX8-NEXT:    v_cndmask_b32_e64 v21, v32, v21, s[22:23]
+; GFX8-NEXT:    v_cndmask_b32_e64 v23, v32, v23, s[24:25]
+; GFX8-NEXT:    v_cndmask_b32_e64 v25, v32, v25, s[26:27]
+; GFX8-NEXT:    v_cndmask_b32_e64 v27, v32, v27, s[28:29]
+; GFX8-NEXT:    v_cndmask_b32_e64 v29, v32, v29, s[40:41]
+; GFX8-NEXT:    v_cndmask_b32_e64 v31, v32, v31, s[42:43]
+; GFX8-NEXT:    v_cndmask_b32_e64 v30, 0, v30, s[42:43]
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-LABEL: v_minimum_v16f64:
@@ -2249,117 +2249,117 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:8
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[31:32]
 ; GFX900-NEXT:    v_min_f64 v[0:1], v[0:1], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:16
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:12
-; GFX900-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[4:5], v[2:3], v[31:32]
 ; GFX900-NEXT:    v_min_f64 v[2:3], v[2:3], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:24
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:20
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[4:5]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[6:7], v[4:5], v[31:32]
 ; GFX900-NEXT:    v_min_f64 v[4:5], v[4:5], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:32
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX900-NEXT:    v_cndmask_b32_e64 v4, v4, 0, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[6:7]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[8:9], v[6:7], v[31:32]
 ; GFX900-NEXT:    v_min_f64 v[6:7], v[6:7], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:36
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:40
-; GFX900-NEXT:    v_cndmask_b32_e64 v6, v6, 0, s[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v6, 0, v6, s[8:9]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[10:11], v[8:9], v[31:32]
 ; GFX900-NEXT:    v_min_f64 v[8:9], v[8:9], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:48
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:44
-; GFX900-NEXT:    v_cndmask_b32_e64 v8, v8, 0, s[10:11]
+; GFX900-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[10:11]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[12:13], v[10:11], v[31:32]
 ; GFX900-NEXT:    v_min_f64 v[10:11], v[10:11], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:56
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:52
-; GFX900-NEXT:    v_cndmask_b32_e64 v10, v10, 0, s[12:13]
+; GFX900-NEXT:    v_cndmask_b32_e64 v10, 0, v10, s[12:13]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[14:15], v[12:13], v[31:32]
 ; GFX900-NEXT:    v_min_f64 v[12:13], v[12:13], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:64
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:60
-; GFX900-NEXT:    v_cndmask_b32_e64 v12, v12, 0, s[14:15]
+; GFX900-NEXT:    v_cndmask_b32_e64 v12, 0, v12, s[14:15]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[16:17], v[14:15], v[31:32]
 ; GFX900-NEXT:    v_min_f64 v[14:15], v[14:15], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:68
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:72
-; GFX900-NEXT:    v_cndmask_b32_e64 v14, v14, 0, s[16:17]
+; GFX900-NEXT:    v_cndmask_b32_e64 v14, 0, v14, s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[18:19], v[16:17], v[31:32]
 ; GFX900-NEXT:    v_min_f64 v[16:17], v[16:17], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:80
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:76
-; GFX900-NEXT:    v_cndmask_b32_e64 v16, v16, 0, s[18:19]
+; GFX900-NEXT:    v_cndmask_b32_e64 v16, 0, v16, s[18:19]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[20:21], v[18:19], v[31:32]
 ; GFX900-NEXT:    v_min_f64 v[18:19], v[18:19], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:88
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:84
-; GFX900-NEXT:    v_cndmask_b32_e64 v18, v18, 0, s[20:21]
+; GFX900-NEXT:    v_cndmask_b32_e64 v18, 0, v18, s[20:21]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[22:23], v[20:21], v[31:32]
 ; GFX900-NEXT:    v_min_f64 v[20:21], v[20:21], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:96
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:92
-; GFX900-NEXT:    v_cndmask_b32_e64 v20, v20, 0, s[22:23]
+; GFX900-NEXT:    v_cndmask_b32_e64 v20, 0, v20, s[22:23]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[24:25], v[22:23], v[31:32]
 ; GFX900-NEXT:    v_min_f64 v[22:23], v[22:23], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:100
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:104
-; GFX900-NEXT:    v_cndmask_b32_e64 v22, v22, 0, s[24:25]
+; GFX900-NEXT:    v_cndmask_b32_e64 v22, 0, v22, s[24:25]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[26:27], v[24:25], v[31:32]
 ; GFX900-NEXT:    v_min_f64 v[24:25], v[24:25], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:112
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:108
-; GFX900-NEXT:    v_cndmask_b32_e64 v24, v24, 0, s[26:27]
+; GFX900-NEXT:    v_cndmask_b32_e64 v24, 0, v24, s[26:27]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[28:29], v[26:27], v[31:32]
 ; GFX900-NEXT:    v_min_f64 v[26:27], v[26:27], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:120
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:116
-; GFX900-NEXT:    v_cndmask_b32_e64 v26, v26, 0, s[28:29]
+; GFX900-NEXT:    v_cndmask_b32_e64 v26, 0, v26, s[28:29]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[40:41], v[28:29], v[31:32]
 ; GFX900-NEXT:    v_min_f64 v[28:29], v[28:29], v[31:32]
 ; GFX900-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX900-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:128
 ; GFX900-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX900-NEXT:    v_cndmask_b32_e64 v28, v28, 0, s[40:41]
+; GFX900-NEXT:    v_cndmask_b32_e64 v28, 0, v28, s[40:41]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
+; GFX900-NEXT:    v_cmp_o_f64_e64 s[42:43], v[30:31], v[32:33]
 ; GFX900-NEXT:    v_min_f64 v[30:31], v[30:31], v[32:33]
 ; GFX900-NEXT:    v_mov_b32_e32 v32, 0x7ff80000
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v32, vcc
-; GFX900-NEXT:    v_cndmask_b32_e64 v3, v3, v32, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v5, v5, v32, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v7, v32, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v9, v9, v32, s[10:11]
-; GFX900-NEXT:    v_cndmask_b32_e64 v11, v11, v32, s[12:13]
-; GFX900-NEXT:    v_cndmask_b32_e64 v13, v13, v32, s[14:15]
-; GFX900-NEXT:    v_cndmask_b32_e64 v15, v15, v32, s[16:17]
-; GFX900-NEXT:    v_cndmask_b32_e64 v17, v17, v32, s[18:19]
-; GFX900-NEXT:    v_cndmask_b32_e64 v19, v19, v32, s[20:21]
-; GFX900-NEXT:    v_cndmask_b32_e64 v21, v21, v32, s[22:23]
-; GFX900-NEXT:    v_cndmask_b32_e64 v23, v23, v32, s[24:25]
-; GFX900-NEXT:    v_cndmask_b32_e64 v25, v25, v32, s[26:27]
-; GFX900-NEXT:    v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX900-NEXT:    v_cndmask_b32_e64 v29, v29, v32, s[40:41]
-; GFX900-NEXT:    v_cndmask_b32_e64 v31, v31, v32, s[42:43]
-; GFX900-NEXT:    v_cndmask_b32_e64 v30, v30, 0, s[42:43]
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v32, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e64 v3, v32, v3, s[4:5]
+; GFX900-NEXT:    v_cndmask_b32_e64 v5, v32, v5, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v32, v7, s[8:9]
+; GFX900-NEXT:    v_cndmask_b32_e64 v9, v32, v9, s[10:11]
+; GFX900-NEXT:    v_cndmask_b32_e64 v11, v32, v11, s[12:13]
+; GFX900-NEXT:    v_cndmask_b32_e64 v13, v32, v13, s[14:15]
+; GFX900-NEXT:    v_cndmask_b32_e64 v15, v32, v15, s[16:17]
+; GFX900-NEXT:    v_cndmask_b32_e64 v17, v32, v17, s[18:19]
+; GFX900-NEXT:    v_cndmask_b32_e64 v19, v32, v19, s[20:21]
+; GFX900-NEXT:    v_cndmask_b32_e64 v21, v32, v21, s[22:23]
+; GFX900-NEXT:    v_cndmask_b32_e64 v23, v32, v23, s[24:25]
+; GFX900-NEXT:    v_cndmask_b32_e64 v25, v32, v25, s[26:27]
+; GFX900-NEXT:    v_cndmask_b32_e64 v27, v32, v27, s[28:29]
+; GFX900-NEXT:    v_cndmask_b32_e64 v29, v32, v29, s[40:41]
+; GFX900-NEXT:    v_cndmask_b32_e64 v31, v32, v31, s[42:43]
+; GFX900-NEXT:    v_cndmask_b32_e64 v30, 0, v30, s[42:43]
 ; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX950-LABEL: v_minimum_v16f64:
@@ -2410,107 +2410,107 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX950-NEXT:    v_accvgpr_write_b32 a15, v63 ; Reload Reuse
 ; GFX950-NEXT:    s_waitcnt vmcnt(25)
 ; GFX950-NEXT:    v_min_f64 v[58:59], v[0:1], v[32:33]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[32:33]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[32:33]
 ; GFX950-NEXT:    scratch_load_dword v33, off, s32 offset:112
 ; GFX950-NEXT:    scratch_load_dword v32, off, s32 offset:108
 ; GFX950-NEXT:    s_waitcnt vmcnt(25)
 ; GFX950-NEXT:    v_min_f64 v[60:61], v[2:3], v[36:37]
-; GFX950-NEXT:    v_cmp_u_f64_e64 s[0:1], v[2:3], v[36:37]
+; GFX950-NEXT:    v_cmp_o_f64_e64 s[0:1], v[2:3], v[36:37]
 ; GFX950-NEXT:    scratch_load_dword v37, off, s32 offset:120
 ; GFX950-NEXT:    scratch_load_dword v36, off, s32 offset:116
 ; GFX950-NEXT:    s_waitcnt vmcnt(25)
 ; GFX950-NEXT:    v_min_f64 v[62:63], v[4:5], v[38:39]
-; GFX950-NEXT:    v_cmp_u_f64_e64 s[2:3], v[4:5], v[38:39]
+; GFX950-NEXT:    v_cmp_o_f64_e64 s[2:3], v[4:5], v[38:39]
 ; GFX950-NEXT:    scratch_load_dword v39, off, s32 offset:128
 ; GFX950-NEXT:    scratch_load_dword v38, off, s32 offset:124
 ; GFX950-NEXT:    v_mov_b32_e32 v2, 0x7ff80000
 ; GFX950-NEXT:    s_waitcnt vmcnt(25)
 ; GFX950-NEXT:    v_min_f64 v[0:1], v[6:7], v[56:57]
-; GFX950-NEXT:    v_cmp_u_f64_e64 s[4:5], v[6:7], v[56:57]
+; GFX950-NEXT:    v_cmp_o_f64_e64 s[4:5], v[6:7], v[56:57]
 ; GFX950-NEXT:    s_waitcnt vmcnt(23)
 ; GFX950-NEXT:    v_min_f64 v[56:57], v[8:9], v[46:47]
-; GFX950-NEXT:    v_cndmask_b32_e64 v58, v58, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v59, v59, v2, vcc
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
-; GFX950-NEXT:    v_cndmask_b32_e64 v6, v0, 0, s[4:5]
-; GFX950-NEXT:    v_cndmask_b32_e64 v7, v1, v2, s[4:5]
-; GFX950-NEXT:    v_cndmask_b32_e64 v8, v56, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v9, v57, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v58, 0, v58, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v59, v2, v59, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[8:9], v[46:47]
+; GFX950-NEXT:    v_cndmask_b32_e64 v6, 0, v0, s[4:5]
+; GFX950-NEXT:    v_cndmask_b32_e64 v7, v2, v1, s[4:5]
+; GFX950-NEXT:    v_cndmask_b32_e32 v8, 0, v56, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v9, v2, v57, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(21)
 ; GFX950-NEXT:    v_min_f64 v[0:1], v[10:11], v[44:45]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
-; GFX950-NEXT:    v_cndmask_b32_e64 v60, v60, 0, s[0:1]
-; GFX950-NEXT:    v_cndmask_b32_e64 v3, v61, v2, s[0:1]
-; GFX950-NEXT:    v_cndmask_b32_e64 v10, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v11, v1, v2, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[10:11], v[44:45]
+; GFX950-NEXT:    v_cndmask_b32_e64 v60, 0, v60, s[0:1]
+; GFX950-NEXT:    v_cndmask_b32_e64 v3, v2, v61, s[0:1]
+; GFX950-NEXT:    v_cndmask_b32_e32 v10, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v11, v2, v1, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(19)
 ; GFX950-NEXT:    v_min_f64 v[0:1], v[12:13], v[42:43]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
-; GFX950-NEXT:    v_cndmask_b32_e64 v4, v62, 0, s[2:3]
-; GFX950-NEXT:    v_cndmask_b32_e64 v5, v63, v2, s[2:3]
-; GFX950-NEXT:    v_cndmask_b32_e64 v12, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v13, v1, v2, vcc
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[12:13], v[42:43]
+; GFX950-NEXT:    v_cndmask_b32_e64 v4, 0, v62, s[2:3]
+; GFX950-NEXT:    v_cndmask_b32_e64 v5, v2, v63, s[2:3]
+; GFX950-NEXT:    v_cndmask_b32_e32 v12, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v13, v2, v1, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(17)
 ; GFX950-NEXT:    v_min_f64 v[0:1], v[14:15], v[40:41]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[14:15], v[40:41]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v63, a15 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_read_b32 v62, a14 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v14, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v15, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v14, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v15, v2, v1, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(15)
 ; GFX950-NEXT:    v_min_f64 v[0:1], v[16:17], v[54:55]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[16:17], v[54:55]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v61, a13 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_read_b32 v57, a9 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v16, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v17, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v16, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v17, v2, v1, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(13)
 ; GFX950-NEXT:    v_min_f64 v[0:1], v[18:19], v[52:53]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[18:19], v[52:53]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v56, a8 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_read_b32 v47, a7 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v18, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v19, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v18, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v19, v2, v1, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(11)
 ; GFX950-NEXT:    v_min_f64 v[0:1], v[20:21], v[50:51]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[20:21], v[50:51]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v46, a6 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_read_b32 v45, a5 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v20, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v21, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v20, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v21, v2, v1, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(9)
 ; GFX950-NEXT:    v_min_f64 v[0:1], v[22:23], v[48:49]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[22:23], v[48:49]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[22:23], v[48:49]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v44, a4 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_read_b32 v43, a3 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v22, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v23, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v22, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v23, v2, v1, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(6)
 ; GFX950-NEXT:    v_min_f64 v[0:1], v[24:25], v[34:35]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[24:25], v[34:35]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[24:25], v[34:35]
 ; GFX950-NEXT:    v_accvgpr_read_b32 v42, a2 ; Reload Reuse
 ; GFX950-NEXT:    v_accvgpr_read_b32 v41, a1 ; Reload Reuse
-; GFX950-NEXT:    v_cndmask_b32_e64 v24, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v25, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v24, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v25, v2, v1, vcc
 ; GFX950-NEXT:    v_accvgpr_read_b32 v40, a0 ; Reload Reuse
 ; GFX950-NEXT:    s_waitcnt vmcnt(4)
 ; GFX950-NEXT:    v_min_f64 v[0:1], v[26:27], v[32:33]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[26:27], v[32:33]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[26:27], v[32:33]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v26, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v27, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v26, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v27, v2, v1, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(2)
 ; GFX950-NEXT:    v_min_f64 v[0:1], v[28:29], v[36:37]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[28:29], v[36:37]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[28:29], v[36:37]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v28, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v29, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v28, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v29, v2, v1, vcc
 ; GFX950-NEXT:    s_waitcnt vmcnt(0)
 ; GFX950-NEXT:    v_min_f64 v[0:1], v[30:31], v[38:39]
-; GFX950-NEXT:    v_cmp_u_f64_e32 vcc, v[30:31], v[38:39]
+; GFX950-NEXT:    v_cmp_o_f64_e32 vcc, v[30:31], v[38:39]
 ; GFX950-NEXT:    s_nop 1
-; GFX950-NEXT:    v_cndmask_b32_e64 v30, v0, 0, vcc
-; GFX950-NEXT:    v_cndmask_b32_e32 v31, v1, v2, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v30, 0, v0, vcc
+; GFX950-NEXT:    v_cndmask_b32_e32 v31, v2, v1, vcc
 ; GFX950-NEXT:    v_mov_b32_e32 v0, v58
 ; GFX950-NEXT:    v_mov_b32_e32 v1, v59
 ; GFX950-NEXT:    v_mov_b32_e32 v2, v60
@@ -2550,92 +2550,92 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX10-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:72
 ; GFX10-NEXT:    s_waitcnt vmcnt(23)
 ; GFX10-NEXT:    v_min_f64 v[82:83], v[0:1], v[31:32]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[31:32]
-; GFX10-NEXT:    s_waitcnt vmcnt(21)
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[31:32]
+; GFX10-NEXT:    s_waitcnt vmcnt(22)
 ; GFX10-NEXT:    v_min_f64 v[84:85], v[2:3], v[33:34]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s4, v[2:3], v[33:34]
-; GFX10-NEXT:    s_waitcnt vmcnt(19)
+; GFX10-NEXT:    v_cmp_o_f64_e64 s4, v[2:3], v[33:34]
+; GFX10-NEXT:    s_clause 0x3
+; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:120
+; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:116
+; GFX10-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:112
+; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:108
+; GFX10-NEXT:    s_waitcnt vmcnt(24)
 ; GFX10-NEXT:    v_min_f64 v[32:33], v[4:5], v[35:36]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s5, v[4:5], v[35:36]
-; GFX10-NEXT:    s_clause 0x7
-; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:112
-; GFX10-NEXT:    buffer_load_dword v67, off, s[0:3], s32 offset:104
-; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:108
-; GFX10-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:120
-; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:116
+; GFX10-NEXT:    v_cmp_o_f64_e64 s5, v[4:5], v[35:36]
+; GFX10-NEXT:    s_clause 0x2
 ; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX10-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:128
 ; GFX10-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:124
 ; GFX10-NEXT:    s_waitcnt vmcnt(24)
 ; GFX10-NEXT:    v_min_f64 v[34:35], v[6:7], v[48:49]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s6, v[6:7], v[48:49]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s6, v[6:7], v[48:49]
 ; GFX10-NEXT:    s_waitcnt vmcnt(21)
-; GFX10-NEXT:    v_cmp_u_f64_e64 s10, v[14:15], v[52:53]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s10, v[14:15], v[52:53]
 ; GFX10-NEXT:    s_waitcnt vmcnt(19)
-; GFX10-NEXT:    v_cmp_u_f64_e64 s9, v[12:13], v[54:55]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s9, v[12:13], v[54:55]
 ; GFX10-NEXT:    s_waitcnt vmcnt(17)
-; GFX10-NEXT:    v_cmp_u_f64_e64 s8, v[10:11], v[64:65]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s8, v[10:11], v[64:65]
 ; GFX10-NEXT:    s_waitcnt vmcnt(16)
 ; GFX10-NEXT:    v_min_f64 v[48:49], v[8:9], v[37:38]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s7, v[8:9], v[37:38]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s7, v[8:9], v[37:38]
 ; GFX10-NEXT:    v_min_f64 v[36:37], v[10:11], v[64:65]
 ; GFX10-NEXT:    v_min_f64 v[38:39], v[12:13], v[54:55]
 ; GFX10-NEXT:    v_min_f64 v[54:55], v[14:15], v[52:53]
 ; GFX10-NEXT:    s_waitcnt vmcnt(11)
 ; GFX10-NEXT:    v_min_f64 v[64:65], v[20:21], v[70:71]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s13, v[20:21], v[70:71]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s13, v[20:21], v[70:71]
 ; GFX10-NEXT:    s_waitcnt vmcnt(9)
-; GFX10-NEXT:    v_cmp_u_f64_e64 s12, v[18:19], v[80:81]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s12, v[18:19], v[80:81]
 ; GFX10-NEXT:    s_waitcnt vmcnt(8)
 ; GFX10-NEXT:    v_min_f64 v[52:53], v[16:17], v[50:51]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s11, v[16:17], v[50:51]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s11, v[16:17], v[50:51]
 ; GFX10-NEXT:    v_min_f64 v[50:51], v[18:19], v[80:81]
 ; GFX10-NEXT:    v_min_f64 v[70:71], v[22:23], v[68:69]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s14, v[22:23], v[68:69]
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v34, 0, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v35, 0x7ff80000, s6
-; GFX10-NEXT:    v_cndmask_b32_e64 v8, v48, 0, s7
-; GFX10-NEXT:    v_cndmask_b32_e64 v9, v49, 0x7ff80000, s7
-; GFX10-NEXT:    v_cndmask_b32_e64 v10, v36, 0, s8
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, v37, 0x7ff80000, s8
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, v38, 0, s9
-; GFX10-NEXT:    v_cndmask_b32_e64 v13, v39, 0x7ff80000, s9
-; GFX10-NEXT:    v_cndmask_b32_e64 v14, v54, 0, s10
-; GFX10-NEXT:    v_cndmask_b32_e64 v15, v55, 0x7ff80000, s10
-; GFX10-NEXT:    v_cndmask_b32_e64 v16, v52, 0, s11
-; GFX10-NEXT:    v_cndmask_b32_e64 v17, v53, 0x7ff80000, s11
-; GFX10-NEXT:    v_cndmask_b32_e64 v18, v50, 0, s12
-; GFX10-NEXT:    v_cndmask_b32_e64 v19, v51, 0x7ff80000, s12
-; GFX10-NEXT:    v_cndmask_b32_e64 v20, v64, 0, s13
-; GFX10-NEXT:    v_cndmask_b32_e64 v21, v65, 0x7ff80000, s13
-; GFX10-NEXT:    v_cndmask_b32_e64 v22, v70, 0, s14
-; GFX10-NEXT:    v_cndmask_b32_e64 v23, v71, 0x7ff80000, s14
-; GFX10-NEXT:    s_waitcnt vmcnt(6)
+; GFX10-NEXT:    v_cmp_o_f64_e64 s14, v[22:23], v[68:69]
+; GFX10-NEXT:    s_waitcnt vmcnt(7)
 ; GFX10-NEXT:    v_min_f64 v[68:69], v[24:25], v[66:67]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s15, v[24:25], v[66:67]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s15, v[24:25], v[66:67]
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, v34, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0x7ff80000, v35, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, v48, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0x7ff80000, v49, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, v36, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0x7ff80000, v37, s8
+; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, v38, s9
+; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0x7ff80000, v39, s9
+; GFX10-NEXT:    v_cndmask_b32_e64 v14, 0, v54, s10
+; GFX10-NEXT:    v_cndmask_b32_e64 v15, 0x7ff80000, v55, s10
+; GFX10-NEXT:    v_cndmask_b32_e64 v16, 0, v52, s11
+; GFX10-NEXT:    v_cndmask_b32_e64 v17, 0x7ff80000, v53, s11
+; GFX10-NEXT:    v_cndmask_b32_e64 v18, 0, v50, s12
+; GFX10-NEXT:    v_cndmask_b32_e64 v19, 0x7ff80000, v51, s12
+; GFX10-NEXT:    v_cndmask_b32_e64 v20, 0, v64, s13
+; GFX10-NEXT:    v_cndmask_b32_e64 v21, 0x7ff80000, v65, s13
+; GFX10-NEXT:    v_cndmask_b32_e64 v22, 0, v70, s14
+; GFX10-NEXT:    v_cndmask_b32_e64 v23, 0x7ff80000, v71, s14
+; GFX10-NEXT:    v_cndmask_b32_e64 v24, 0, v68, s15
+; GFX10-NEXT:    v_cndmask_b32_e64 v25, 0x7ff80000, v69, s15
 ; GFX10-NEXT:    s_waitcnt vmcnt(5)
-; GFX10-NEXT:    v_min_f64 v[66:67], v[26:27], v[0:1]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s16, v[26:27], v[0:1]
+; GFX10-NEXT:    v_min_f64 v[80:81], v[28:29], v[0:1]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s17, v[28:29], v[0:1]
 ; GFX10-NEXT:    s_waitcnt vmcnt(3)
-; GFX10-NEXT:    v_min_f64 v[80:81], v[28:29], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s17, v[28:29], v[2:3]
+; GFX10-NEXT:    v_min_f64 v[66:67], v[26:27], v[2:3]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s16, v[26:27], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v82, vcc_lo
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_min_f64 v[86:87], v[30:31], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e64 s18, v[30:31], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v82, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v83, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v84, 0, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v85, 0x7ff80000, s4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v32, 0, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v33, 0x7ff80000, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v24, v68, 0, s15
-; GFX10-NEXT:    v_cndmask_b32_e64 v25, v69, 0x7ff80000, s15
-; GFX10-NEXT:    v_cndmask_b32_e64 v26, v66, 0, s16
-; GFX10-NEXT:    v_cndmask_b32_e64 v27, v67, 0x7ff80000, s16
-; GFX10-NEXT:    v_cndmask_b32_e64 v28, v80, 0, s17
-; GFX10-NEXT:    v_cndmask_b32_e64 v29, v81, 0x7ff80000, s17
-; GFX10-NEXT:    v_cndmask_b32_e64 v30, v86, 0, s18
-; GFX10-NEXT:    v_cndmask_b32_e64 v31, v87, 0x7ff80000, s18
+; GFX10-NEXT:    v_cmp_o_f64_e64 s18, v[30:31], v[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v83, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v84, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v85, s4
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, v32, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v33, s5
+; GFX10-NEXT:    v_cndmask_b32_e64 v28, 0, v80, s17
+; GFX10-NEXT:    v_cndmask_b32_e64 v29, 0x7ff80000, v81, s17
+; GFX10-NEXT:    v_cndmask_b32_e64 v26, 0, v66, s16
+; GFX10-NEXT:    v_cndmask_b32_e64 v27, 0x7ff80000, v67, s16
+; GFX10-NEXT:    v_cndmask_b32_e64 v30, 0, v86, s18
+; GFX10-NEXT:    v_cndmask_b32_e64 v31, 0x7ff80000, v87, s18
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_minimum_v16f64:
@@ -2677,84 +2677,84 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX11-NEXT:    scratch_load_b32 v86, off, s32 offset:124
 ; GFX11-NEXT:    s_waitcnt vmcnt(30)
 ; GFX11-NEXT:    v_min_f64 v[96:97], v[0:1], v[32:33]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[32:33]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[32:33]
 ; GFX11-NEXT:    s_waitcnt vmcnt(28)
 ; GFX11-NEXT:    v_min_f64 v[32:33], v[2:3], v[34:35]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s0, v[2:3], v[34:35]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s0, v[2:3], v[34:35]
 ; GFX11-NEXT:    s_waitcnt vmcnt(26)
 ; GFX11-NEXT:    v_min_f64 v[34:35], v[4:5], v[36:37]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s1, v[4:5], v[36:37]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s1, v[4:5], v[36:37]
 ; GFX11-NEXT:    s_waitcnt vmcnt(24)
 ; GFX11-NEXT:    v_min_f64 v[36:37], v[6:7], v[38:39]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s2, v[6:7], v[38:39]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s2, v[6:7], v[38:39]
 ; GFX11-NEXT:    s_waitcnt vmcnt(22)
 ; GFX11-NEXT:    v_min_f64 v[38:39], v[8:9], v[48:49]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s3, v[8:9], v[48:49]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s3, v[8:9], v[48:49]
 ; GFX11-NEXT:    s_waitcnt vmcnt(20)
 ; GFX11-NEXT:    v_min_f64 v[48:49], v[10:11], v[50:51]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s4, v[10:11], v[50:51]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s4, v[10:11], v[50:51]
 ; GFX11-NEXT:    s_waitcnt vmcnt(18)
 ; GFX11-NEXT:    v_min_f64 v[50:51], v[12:13], v[52:53]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s5, v[12:13], v[52:53]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s5, v[12:13], v[52:53]
 ; GFX11-NEXT:    s_waitcnt vmcnt(16)
 ; GFX11-NEXT:    v_min_f64 v[52:53], v[14:15], v[54:55]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s6, v[14:15], v[54:55]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s6, v[14:15], v[54:55]
 ; GFX11-NEXT:    s_waitcnt vmcnt(14)
 ; GFX11-NEXT:    v_min_f64 v[54:55], v[16:17], v[64:65]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s7, v[16:17], v[64:65]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s7, v[16:17], v[64:65]
 ; GFX11-NEXT:    s_waitcnt vmcnt(12)
 ; GFX11-NEXT:    v_min_f64 v[64:65], v[18:19], v[66:67]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s8, v[18:19], v[66:67]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s8, v[18:19], v[66:67]
 ; GFX11-NEXT:    s_waitcnt vmcnt(10)
 ; GFX11-NEXT:    v_min_f64 v[66:67], v[20:21], v[68:69]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s9, v[20:21], v[68:69]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s9, v[20:21], v[68:69]
 ; GFX11-NEXT:    s_waitcnt vmcnt(8)
 ; GFX11-NEXT:    v_min_f64 v[68:69], v[22:23], v[70:71]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s10, v[22:23], v[70:71]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s10, v[22:23], v[70:71]
 ; GFX11-NEXT:    s_waitcnt vmcnt(6)
 ; GFX11-NEXT:    v_min_f64 v[70:71], v[24:25], v[80:81]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s11, v[24:25], v[80:81]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s11, v[24:25], v[80:81]
 ; GFX11-NEXT:    s_waitcnt vmcnt(4)
 ; GFX11-NEXT:    v_min_f64 v[80:81], v[26:27], v[82:83]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s12, v[26:27], v[82:83]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s12, v[26:27], v[82:83]
 ; GFX11-NEXT:    s_waitcnt vmcnt(2)
 ; GFX11-NEXT:    v_min_f64 v[82:83], v[28:29], v[84:85]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s13, v[28:29], v[84:85]
+; GFX11-NEXT:    v_cmp_o_f64_e64 s13, v[28:29], v[84:85]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_min_f64 v[84:85], v[30:31], v[86:87]
-; GFX11-NEXT:    v_cmp_u_f64_e64 s14, v[30:31], v[86:87]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v96, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v97, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v32, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v3, v33, 0x7ff80000, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v34, 0, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, v35, 0x7ff80000, s1
-; GFX11-NEXT:    v_cndmask_b32_e64 v6, v36, 0, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v7, v37, 0x7ff80000, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v8, v38, 0, s3
-; GFX11-NEXT:    v_cndmask_b32_e64 v9, v39, 0x7ff80000, s3
-; GFX11-NEXT:    v_cndmask_b32_e64 v10, v48, 0, s4
-; GFX11-NEXT:    v_cndmask_b32_e64 v11, v49, 0x7ff80000, s4
-; GFX11-NEXT:    v_cndmask_b32_e64 v12, v50, 0, s5
-; GFX11-NEXT:    v_cndmask_b32_e64 v13, v51, 0x7ff80000, s5
-; GFX11-NEXT:    v_cndmask_b32_e64 v14, v52, 0, s6
-; GFX11-NEXT:    v_cndmask_b32_e64 v15, v53, 0x7ff80000, s6
-; GFX11-NEXT:    v_cndmask_b32_e64 v16, v54, 0, s7
-; GFX11-NEXT:    v_cndmask_b32_e64 v17, v55, 0x7ff80000, s7
-; GFX11-NEXT:    v_cndmask_b32_e64 v18, v64, 0, s8
-; GFX11-NEXT:    v_cndmask_b32_e64 v19, v65, 0x7ff80000, s8
-; GFX11-NEXT:    v_cndmask_b32_e64 v20, v66, 0, s9
-; GFX11-NEXT:    v_cndmask_b32_e64 v21, v67, 0x7ff80000, s9
-; GFX11-NEXT:    v_cndmask_b32_e64 v22, v68, 0, s10
-; GFX11-NEXT:    v_cndmask_b32_e64 v23, v69, 0x7ff80000, s10
-; GFX11-NEXT:    v_cndmask_b32_e64 v24, v70, 0, s11
-; GFX11-NEXT:    v_cndmask_b32_e64 v25, v71, 0x7ff80000, s11
-; GFX11-NEXT:    v_cndmask_b32_e64 v26, v80, 0, s12
-; GFX11-NEXT:    v_cndmask_b32_e64 v27, v81, 0x7ff80000, s12
-; GFX11-NEXT:    v_cndmask_b32_e64 v28, v82, 0, s13
-; GFX11-NEXT:    v_cndmask_b32_e64 v29, v83, 0x7ff80000, s13
-; GFX11-NEXT:    v_cndmask_b32_e64 v30, v84, 0, s14
-; GFX11-NEXT:    v_cndmask_b32_e64 v31, v85, 0x7ff80000, s14
+; GFX11-NEXT:    v_cmp_o_f64_e64 s14, v[30:31], v[86:87]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v96, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v97, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v32, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v33, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, v34, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v35, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, 0, v36, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, 0x7ff80000, v37, s2
+; GFX11-NEXT:    v_cndmask_b32_e64 v8, 0, v38, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v9, 0x7ff80000, v39, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v10, 0, v48, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v11, 0x7ff80000, v49, s4
+; GFX11-NEXT:    v_cndmask_b32_e64 v12, 0, v50, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v13, 0x7ff80000, v51, s5
+; GFX11-NEXT:    v_cndmask_b32_e64 v14, 0, v52, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v15, 0x7ff80000, v53, s6
+; GFX11-NEXT:    v_cndmask_b32_e64 v16, 0, v54, s7
+; GFX11-NEXT:    v_cndmask_b32_e64 v17, 0x7ff80000, v55, s7
+; GFX11-NEXT:    v_cndmask_b32_e64 v18, 0, v64, s8
+; GFX11-NEXT:    v_cndmask_b32_e64 v19, 0x7ff80000, v65, s8
+; GFX11-NEXT:    v_cndmask_b32_e64 v20, 0, v66, s9
+; GFX11-NEXT:    v_cndmask_b32_e64 v21, 0x7ff80000, v67, s9
+; GFX11-NEXT:    v_cndmask_b32_e64 v22, 0, v68, s10
+; GFX11-NEXT:    v_cndmask_b32_e64 v23, 0x7ff80000, v69, s10
+; GFX11-NEXT:    v_cndmask_b32_e64 v24, 0, v70, s11
+; GFX11-NEXT:    v_cndmask_b32_e64 v25, 0x7ff80000, v71, s11
+; GFX11-NEXT:    v_cndmask_b32_e64 v26, 0, v80, s12
+; GFX11-NEXT:    v_cndmask_b32_e64 v27, 0x7ff80000, v81, s12
+; GFX11-NEXT:    v_cndmask_b32_e64 v28, 0, v82, s13
+; GFX11-NEXT:    v_cndmask_b32_e64 v29, 0x7ff80000, v83, s13
+; GFX11-NEXT:    v_cndmask_b32_e64 v30, 0, v84, s14
+; GFX11-NEXT:    v_cndmask_b32_e64 v31, 0x7ff80000, v85, s14
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v16f64:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
index af914bd4043cf..696832ddc6d27 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
@@ -87,10 +87,11 @@ define amdgpu_kernel void @v_round_f64(ptr addrspace(1) %out, ptr addrspace(1) %
 ; SI-NEXT:    v_not_b32_e32 v5, v5
 ; SI-NEXT:    v_not_b32_e32 v4, v4
 ; SI-NEXT:    v_and_b32_e32 v5, v3, v5
-; SI-NEXT:    v_and_b32_e32 v4, v2, v4
 ; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v6
+; SI-NEXT:    v_and_b32_e32 v4, v2, v4
 ; SI-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
-; SI-NEXT:    v_cndmask_b32_e64 v4, v4, 0, vcc
+; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v6
+; SI-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 51, v6
 ; SI-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll b/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll
index 12ccdfff07c6f..1703dd8ace0dc 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll
@@ -40,9 +40,8 @@ define amdgpu_cs void @test_i32_sle(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %ou
 define amdgpu_cs void @test_i32_sgt(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_i32_sgt:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 2, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 1, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
 ; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -58,9 +57,8 @@ define amdgpu_cs void @test_i32_sgt(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %ou
 define amdgpu_cs void @test_i32_slt(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_i32_slt:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 2, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 3, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
 ; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -113,11 +111,9 @@ define amdgpu_cs void @test_i64_sle(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %ou
 define amdgpu_cs void @test_i64_sgt(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_i64_sgt:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_gt_i64_e32 vcc_lo, 2, v[0:1]
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    v_cmp_lt_i64_e32 vcc_lo, 1, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
 ; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -133,11 +129,9 @@ define amdgpu_cs void @test_i64_sgt(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %ou
 define amdgpu_cs void @test_i64_slt(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_i64_slt:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_lt_i64_e32 vcc_lo, 2, v[0:1]
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    v_cmp_gt_i64_e32 vcc_lo, 3, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
 ; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -154,9 +148,8 @@ define amdgpu_cs void @test_i64_slt(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %ou
 define amdgpu_cs void @test_u32_eq(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_u32_eq:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 1, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
 ; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -191,10 +184,12 @@ define amdgpu_cs void @test_mixed(i32 %a, i32 %p, i32 %q, i32 %r, i32 %s, ptr ad
 ; GCN-LABEL: test_mixed:
 ; GCN:       ; %bb.0: ; %.entry
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, -1, v0
+; GCN-NEXT:    v_cmp_ne_u32_e64 s0, -1, v0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v3, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v4, 0, vcc_lo
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, v3, s0
+; GCN-NEXT:    v_cndmask_b32_e64 v3, 0, v4, s0
 ; GCN-NEXT:    global_store_b128 v[5:6], v[0:3], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -235,9 +230,8 @@ define amdgpu_cs void @test_sgpr(i32 %a, i32 %p, i32 inreg %q, i32 inreg %r, ptr
 define amdgpu_cs void @test_u32_ne(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_u32_ne:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 1, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
 ; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -287,9 +281,8 @@ define amdgpu_cs void @test_u32_ule(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %ou
 define amdgpu_cs void @test_u32_ugt(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_u32_ugt:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 2, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 1, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
 ; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -305,9 +298,8 @@ define amdgpu_cs void @test_u32_ugt(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %ou
 define amdgpu_cs void @test_u32_ult(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_u32_ult:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 2, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 3, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
 ; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -324,11 +316,9 @@ define amdgpu_cs void @test_u32_ult(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %ou
 define amdgpu_cs void @test_u64_eq(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_u64_eq:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 1, v[0:1]
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    v_cmp_ne_u64_e32 vcc_lo, 1, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
 ; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -344,11 +334,9 @@ define amdgpu_cs void @test_u64_eq(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out
 define amdgpu_cs void @test_u64_ne(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_u64_ne:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_ne_u64_e32 vcc_lo, 1, v[0:1]
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    v_cmp_eq_u64_e32 vcc_lo, 1, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
 ; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -400,11 +388,9 @@ define amdgpu_cs void @test_u64_ule(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %ou
 define amdgpu_cs void @test_u64_ugt(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_u64_ugt:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_gt_u64_e32 vcc_lo, 2, v[0:1]
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    v_cmp_lt_u64_e32 vcc_lo, 1, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
 ; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -420,11 +406,9 @@ define amdgpu_cs void @test_u64_ugt(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %ou
 define amdgpu_cs void @test_u64_ult(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_u64_ult:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_lt_u64_e32 vcc_lo, 2, v[0:1]
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    v_cmp_gt_u64_e32 vcc_lo, 3, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
 ; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -441,9 +425,8 @@ define amdgpu_cs void @test_u64_ult(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %ou
 define amdgpu_cs void @test_f32_oeq(float %a, float %p, float %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_f32_oeq:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_eq_f32_e32 vcc_lo, 2.0, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    v_cmp_neq_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
 ; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -479,9 +462,8 @@ define amdgpu_cs void @test_f32_negative_modifiers(float %a, float %p, float %q,
 define amdgpu_cs void @test_f32_one(float %a, float %p, float %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_f32_one:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_lg_f32_e32 vcc_lo, 2.0, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    v_cmp_nlg_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
 ; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -497,9 +479,8 @@ define amdgpu_cs void @test_f32_one(float %a, float %p, float %q, ptr addrspace(
 define amdgpu_cs void @test_f32_ord(float %a, float %p, float %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_f32_ord:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
 ; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -515,9 +496,8 @@ define amdgpu_cs void @test_f32_ord(float %a, float %p, float %q, ptr addrspace(
 define amdgpu_cs void @test_f32_uno(float %a, float %p, float %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_f32_uno:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    v_cmp_o_f32_e32 vcc_lo, v0, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
 ; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -533,9 +513,8 @@ define amdgpu_cs void @test_f32_uno(float %a, float %p, float %q, ptr addrspace(
 define amdgpu_cs void @test_f32_oge(float %a, float %p, float %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_f32_oge:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_ge_f32_e32 vcc_lo, 2.0, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    v_cmp_nge_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
 ; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -551,9 +530,8 @@ define amdgpu_cs void @test_f32_oge(float %a, float %p, float %q, ptr addrspace(
 define amdgpu_cs void @test_f32_ole(float %a, float %p, float %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_f32_ole:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_le_f32_e32 vcc_lo, 2.0, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    v_cmp_nle_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
 ; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -569,9 +547,8 @@ define amdgpu_cs void @test_f32_ole(float %a, float %p, float %q, ptr addrspace(
 define amdgpu_cs void @test_f32_ogt(float %a, float %p, float %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_f32_ogt:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 2.0, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
 ; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -587,9 +564,8 @@ define amdgpu_cs void @test_f32_ogt(float %a, float %p, float %q, ptr addrspace(
 define amdgpu_cs void @test_f32_olt(float %a, float %p, float %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_f32_olt:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_lt_f32_e32 vcc_lo, 2.0, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 0, vcc_lo
+; GCN-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, 2.0, v0
+; GCN-NEXT:    v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2
 ; GCN-NEXT:    global_store_b64 v[3:4], v[0:1], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -606,11 +582,9 @@ define amdgpu_cs void @test_f32_olt(float %a, float %p, float %q, ptr addrspace(
 define amdgpu_cs void @test_f64_oeq(double %a, double %p, double %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_f64_oeq:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_eq_f64_e32 vcc_lo, 2.0, v[0:1]
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    v_cmp_neq_f64_e32 vcc_lo, 2.0, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
 ; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -626,11 +600,9 @@ define amdgpu_cs void @test_f64_oeq(double %a, double %p, double %q, ptr addrspa
 define amdgpu_cs void @test_f64_one(double %a, double %p, double %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_f64_one:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_lg_f64_e32 vcc_lo, 2.0, v[0:1]
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    v_cmp_nlg_f64_e32 vcc_lo, 2.0, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
 ; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -646,11 +618,9 @@ define amdgpu_cs void @test_f64_one(double %a, double %p, double %q, ptr addrspa
 define amdgpu_cs void @test_f64_oge(double %a, double %p, double %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_f64_oge:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_ge_f64_e32 vcc_lo, 2.0, v[0:1]
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    v_cmp_nge_f64_e32 vcc_lo, 2.0, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
 ; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -666,11 +636,9 @@ define amdgpu_cs void @test_f64_oge(double %a, double %p, double %q, ptr addrspa
 define amdgpu_cs void @test_f64_ole(double %a, double %p, double %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_f64_ole:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_le_f64_e32 vcc_lo, 2.0, v[0:1]
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    v_cmp_nle_f64_e32 vcc_lo, 2.0, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
 ; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -686,11 +654,9 @@ define amdgpu_cs void @test_f64_ole(double %a, double %p, double %q, ptr addrspa
 define amdgpu_cs void @test_f64_ogt(double %a, double %p, double %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_f64_ogt:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_gt_f64_e32 vcc_lo, 2.0, v[0:1]
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    v_cmp_ngt_f64_e32 vcc_lo, 2.0, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
 ; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -706,11 +672,9 @@ define amdgpu_cs void @test_f64_ogt(double %a, double %p, double %q, ptr addrspa
 define amdgpu_cs void @test_f64_olt(double %a, double %p, double %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_f64_olt:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_lt_f64_e32 vcc_lo, 2.0, v[0:1]
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    v_cmp_nlt_f64_e32 vcc_lo, 2.0, v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
 ; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -726,11 +690,9 @@ define amdgpu_cs void @test_f64_olt(double %a, double %p, double %q, ptr addrspa
 define amdgpu_cs void @test_f64_ord(double %a, double %p, double %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_f64_ord:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[0:1]
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
 ; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
 ; GCN-NEXT:    s_endpgm
 .entry:
@@ -746,11 +708,9 @@ define amdgpu_cs void @test_f64_ord(double %a, double %p, double %q, ptr addrspa
 define amdgpu_cs void @test_f64_uno(double %a, double %p, double %q, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_f64_uno:
 ; GCN:       ; %bb.0: ; %.entry
-; GCN-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1]
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, 0, vcc_lo
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v4, 0, vcc_lo
+; GCN-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[0:1]
+; GCN-NEXT:    v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2
+; GCN-NEXT:    v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
 ; GCN-NEXT:    global_store_b128 v[6:7], v[0:3], off
 ; GCN-NEXT:    s_endpgm
 .entry:
diff --git a/llvm/test/CodeGen/AMDGPU/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
index 79adc9ead62e1..867ad96fd5ed0 100644
--- a/llvm/test/CodeGen/AMDGPU/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
@@ -696,9 +696,9 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v0, v2
 ; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v1, v3, vcc
-; GFX6-NEXT:    v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1]
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v2, -1, vcc
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v3, -1, vcc
+; GFX6-NEXT:    v_cmp_ge_u64_e32 vcc, v[2:3], v[0:1]
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, -1, v2, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, -1, v3, vcc
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_uaddsat_i64:
@@ -706,9 +706,9 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v0, v2
 ; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v1, v3, vcc
-; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, -1, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v3, -1, vcc
+; GFX8-NEXT:    v_cmp_ge_u64_e32 vcc, v[2:3], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, -1, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, -1, v3, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_uaddsat_i64:
@@ -716,9 +716,9 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v0, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v1, v3, vcc
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1]
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, -1, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v3, -1, vcc
+; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, v[2:3], v[0:1]
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, -1, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, -1, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_uaddsat_i64:
@@ -726,9 +726,9 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v0, v2
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, -1, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, -1, vcc_lo
+; GFX10-NEXT:    v_cmp_ge_u64_e32 vcc_lo, v[2:3], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, -1, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, -1, v3, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_uaddsat_i64:
@@ -737,9 +737,8 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX11-NEXT:    v_add_co_u32 v2, vcc_lo, v0, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_add_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, -1, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, -1, vcc_lo
+; GFX11-NEXT:    v_cmp_ge_u64_e32 vcc_lo, v[2:3], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v0, -1, v2 :: v_dual_cndmask_b32 v1, -1, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %result = call i64 @llvm.uadd.sat.i64(i64 %lhs, i64 %rhs)
   ret i64 %result
diff --git a/llvm/test/CodeGen/AMDGPU/usubsat.ll b/llvm/test/CodeGen/AMDGPU/usubsat.ll
index 90491a07289a0..9e2d1eac3277c 100644
--- a/llvm/test/CodeGen/AMDGPU/usubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/usubsat.ll
@@ -732,9 +732,9 @@ define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v0, v2
 ; GFX6-NEXT:    v_subb_u32_e32 v3, vcc, v1, v3, vcc
-; GFX6-NEXT:    v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc
+; GFX6-NEXT:    v_cmp_le_u64_e32 vcc, v[2:3], v[0:1]
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_usubsat_i64:
@@ -742,9 +742,9 @@ define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v0, v2
 ; GFX8-NEXT:    v_subb_u32_e32 v3, vcc, v1, v3, vcc
-; GFX8-NEXT:    v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc
+; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, v[2:3], v[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_usubsat_i64:
@@ -752,9 +752,9 @@ define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_sub_co_u32_e32 v2, vcc, v0, v2
 ; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v1, v3, vcc
-; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, v[2:3], v[0:1]
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_usubsat_i64:
@@ -762,9 +762,9 @@ define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_sub_co_u32 v2, vcc_lo, v0, v2
 ; GFX10-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_gt_u64_e32 vcc_lo, v[2:3], v[0:1]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_le_u64_e32 vcc_lo, v[2:3], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_usubsat_i64:
@@ -772,9 +772,8 @@ define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_sub_co_u32 v2, vcc_lo, v0, v2
 ; GFX11-NEXT:    v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_u64_e32 vcc_lo, v[2:3], v[0:1]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0, vcc_lo
+; GFX11-NEXT:    v_cmp_le_u64_e32 vcc_lo, v[2:3], v[0:1]
+; GFX11-NEXT:    v_dual_cndmask_b32 v0, 0, v2 :: v_dual_cndmask_b32 v1, 0, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %result = call i64 @llvm.usub.sat.i64(i64 %lhs, i64 %rhs)
   ret i64 %result
diff --git a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
index a41063f467d01..1ef4164a93b9f 100644
--- a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
@@ -2043,9 +2043,9 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(ptr add
 ; SI-NEXT:    buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    s_mov_b64 s[2:3], s[10:11]
-; SI-NEXT:    v_cmp_nle_f32_e32 vcc, 4.0, v2
-; SI-NEXT:    v_cndmask_b32_e64 v2, v3, -1.0, vcc
-; SI-NEXT:    v_cndmask_b32_e64 v3, v3, -2.0, vcc
+; SI-NEXT:    v_cmp_le_f32_e32 vcc, 4.0, v2
+; SI-NEXT:    v_cndmask_b32_e32 v2, -1.0, v3, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v3, -2.0, v3, vcc
 ; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
@@ -2071,9 +2071,9 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(ptr add
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v4
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_cmp_nle_f32_e32 vcc, 4.0, v5
-; VI-NEXT:    v_cndmask_b32_e64 v3, v2, -1.0, vcc
-; VI-NEXT:    v_cndmask_b32_e64 v2, v2, -2.0, vcc
+; VI-NEXT:    v_cmp_le_f32_e32 vcc, 4.0, v5
+; VI-NEXT:    v_cndmask_b32_e32 v3, -1.0, v2, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v2, -2.0, v2, vcc
 ; VI-NEXT:    flat_store_dword v[0:1], v3
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    flat_store_dword v[0:1], v2
@@ -2091,9 +2091,9 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(ptr add
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_load_dword v2, v0, s[6:7] glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_cmp_nle_f32_e32 vcc, 4.0, v1
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, -1.0, vcc
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, -2.0, vcc
+; GFX10-NEXT:    v_cmp_le_f32_e32 vcc, 4.0, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, -1.0, v2, vcc
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, -2.0, v2, vcc
 ; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    global_store_dword v0, v2, s[0:1]
@@ -2113,9 +2113,9 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(ptr add
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    global_load_b32 v2, v0, s[4:5] glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_cmp_nle_f32_e32 vcc, 4.0, v1
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, -1.0, vcc
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, -2.0, vcc
+; GFX11-NEXT:    v_cmp_le_f32_e32 vcc, 4.0, v1
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, -1.0, v2, vcc
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, -2.0, v2, vcc
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1] dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    global_store_b32 v0, v2, s[0:1] dlc
@@ -2135,9 +2135,9 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(ptr add
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    global_load_b32 v2, v0, s[4:5] scope:SCOPE_SYS
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_nle_f32_e32 vcc, 4.0, v1
-; GFX12-NEXT:    v_cndmask_b32_e64 v1, v2, -1.0, vcc
-; GFX12-NEXT:    v_cndmask_b32_e64 v2, v2, -2.0, vcc
+; GFX12-NEXT:    v_cmp_le_f32_e32 vcc, 4.0, v1
+; GFX12-NEXT:    v_cndmask_b32_e32 v1, -1.0, v2, vcc
+; GFX12-NEXT:    v_cndmask_b32_e32 v2, -2.0, v2, vcc
 ; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
 ; GFX12-NEXT:    s_wait_storecnt 0x0
 ; GFX12-NEXT:    global_store_b32 v0, v2, s[0:1] scope:SCOPE_SYS
diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll
index 41fad10051dac..6c71ae5c4d0e8 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll
@@ -1729,50 +1729,50 @@ define double @test_vector_reduce_fmaximum_v2double(<2 x double> %v) {
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v2double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v2double:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v2double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fmaximum_v2double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_max_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fmaximum_v2double:
@@ -1794,28 +1794,28 @@ define double @test_vector_reduce_fmaximum_v3double(<3 x double> %v) {
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX7-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v3double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX8-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v3double:
@@ -1823,44 +1823,44 @@ define double @test_vector_reduce_fmaximum_v3double(<3 x double> %v) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v3double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v7, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v7, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fmaximum_v3double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_max_f64 v[6:7], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v7, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v7, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fmaximum_v3double:
@@ -1884,36 +1884,36 @@ define double @test_vector_reduce_fmaximum_v4double(<4 x double> %v) {
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_max_f64 v[8:9], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX7-NEXT:    v_mov_b32_e32 v10, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v10, v9, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v4double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_f64 v[8:9], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX8-NEXT:    v_mov_b32_e32 v10, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v10, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v4double:
@@ -1921,58 +1921,58 @@ define double @test_vector_reduce_fmaximum_v4double(<4 x double> %v) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[8:9], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v10, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v10, v9, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v4double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_max_f64 v[8:9], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fmaximum_v4double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_max_f64 v[8:9], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fmaximum_v4double:
@@ -1997,68 +1997,68 @@ define double @test_vector_reduce_fmaximum_v8double(<8 x double> %v) {
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_max_f64 v[16:17], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX7-NEXT:    v_mov_b32_e32 v18, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v17, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v18, v17, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v8double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_f64 v[16:17], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX8-NEXT:    v_mov_b32_e32 v18, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v17, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v18, v17, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v8double:
@@ -2066,116 +2066,116 @@ define double @test_vector_reduce_fmaximum_v8double(<8 x double> %v) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[16:17], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v18, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v17, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v18, v17, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[10:11]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[12:13]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[14:15]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v8double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_max_f64 v[16:17], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[10:11]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[12:13]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[14:15]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fmaximum_v8double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_max_f64 v[16:17], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[10:11]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[12:13]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[14:15]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fmaximum_v8double:
@@ -2206,136 +2206,136 @@ define double @test_vector_reduce_fmaximum_v16double(<16 x double> %v) {
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_max_f64 v[31:32], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX7-NEXT:    v_mov_b32_e32 v33, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v32, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v31, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v31, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[16:17]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[16:17]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[18:19]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[18:19]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[18:19]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[20:21]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[20:21]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[20:21]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[22:23]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[22:23]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[22:23]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[24:25]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[24:25]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[24:25]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[26:27]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[26:27]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[26:27]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[28:29]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[28:29]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[28:29]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    v_max_f64 v[2:3], v[0:1], v[30:31]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[30:31]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[30:31]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fmaximum_v16double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_f64 v[31:32], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX8-NEXT:    v_mov_b32_e32 v33, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v32, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v31, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v31, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[16:17]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[16:17]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[18:19]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[18:19]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[18:19]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[20:21]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[20:21]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[20:21]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[22:23]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[22:23]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[22:23]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[24:25]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[24:25]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[24:25]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[26:27]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[26:27]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[26:27]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[28:29]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[28:29]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[28:29]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_max_f64 v[2:3], v[0:1], v[30:31]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[30:31]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[30:31]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fmaximum_v16double:
@@ -2344,237 +2344,237 @@ define double @test_vector_reduce_fmaximum_v16double(<16 x double> %v) {
 ; GFX9-NEXT:    scratch_load_dword v31, off, s32
 ; GFX9-NEXT:    v_max_f64 v[32:33], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v34, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v33, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v33, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v32, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[10:11]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[12:13]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[14:15]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[16:17]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[16:17]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[18:19]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[18:19]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[18:19]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[20:21]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[20:21]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[20:21]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[22:23]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[22:23]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[22:23]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[24:25]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[24:25]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[24:25]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[26:27]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[26:27]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[26:27]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[28:29]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[28:29]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[28:29]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_max_f64 v[2:3], v[0:1], v[30:31]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[30:31]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[30:31]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fmaximum_v16double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_max_f64 v[31:32], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v32, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v31, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v32, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v31, vcc_lo
 ; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[10:11]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[12:13]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[14:15]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[16:17]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[16:17]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[18:19]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[18:19]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[18:19]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[20:21]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[20:21]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[20:21]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[22:23]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[22:23]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[22:23]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[24:25]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[24:25]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[24:25]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[26:27]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[26:27]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[26:27]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[28:29]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[28:29]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[28:29]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_max_f64 v[2:3], v[0:1], v[30:31]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[30:31]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[30:31]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fmaximum_v16double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_max_f64 v[31:32], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v32, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v31, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v32, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v31, vcc_lo
 ; GFX11-NEXT:    scratch_load_b32 v31, off, s32
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[10:11]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[12:13]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[12:13]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[14:15]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[14:15]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[16:17]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[16:17]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[18:19]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[18:19]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[18:19]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[20:21]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[20:21]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[20:21]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[22:23]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[22:23]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[22:23]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[24:25]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[24:25]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[24:25]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[26:27]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[26:27]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[26:27]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[28:29]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[28:29]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[28:29]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_max_f64 v[2:3], v[0:1], v[30:31]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[30:31]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[30:31]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fmaximum_v16double:
diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll
index 61819a85dd82c..c36400fb65988 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll
@@ -1978,50 +1978,50 @@ define double @test_vector_reduce_fminimum_v2double(<2 x double> %v) {
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX7-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v2double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v2double:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
 ; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v2double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fminimum_v2double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_min_f64 v[4:5], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fminimum_v2double:
@@ -2043,28 +2043,28 @@ define double @test_vector_reduce_fminimum_v3double(<3 x double> %v) {
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX7-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v3double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX8-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v3double:
@@ -2072,44 +2072,44 @@ define double @test_vector_reduce_fminimum_v3double(<3 x double> %v) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v8, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v3double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v7, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v7, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fminimum_v3double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_min_f64 v[6:7], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v7, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v7, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: test_vector_reduce_fminimum_v3double:
@@ -2133,36 +2133,36 @@ define double @test_vector_reduce_fminimum_v4double(<4 x double> %v) {
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_min_f64 v[8:9], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX7-NEXT:    v_mov_b32_e32 v10, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v10, v9, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v4double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_min_f64 v[8:9], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX8-NEXT:    v_mov_b32_e32 v10, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v10, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v4double:
@@ -2170,58 +2170,58 @@ define double @test_vector_reduce_fminimum_v4double(<4 x double> %v) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[8:9], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v10, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v10, v9, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v4double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_min_f64 v[8:9], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fminimum_v4double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_min_f64 v[8:9], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v8, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v8, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v4double:
@@ -2259,68 +2259,68 @@ define double @test_vector_reduce_fminimum_v8double(<8 x double> %v) {
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_min_f64 v[16:17], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX7-NEXT:    v_mov_b32_e32 v18, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v17, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v18, v17, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v8double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_min_f64 v[16:17], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX8-NEXT:    v_mov_b32_e32 v18, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v17, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v18, v17, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v8double:
@@ -2328,116 +2328,116 @@ define double @test_vector_reduce_fminimum_v8double(<8 x double> %v) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[16:17], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v18, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v17, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v18, v17, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[10:11]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[12:13]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[14:15]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v18, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v18, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v8double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_min_f64 v[16:17], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[10:11]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[12:13]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[14:15]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fminimum_v8double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_min_f64 v[16:17], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v16, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v16, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[10:11]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[12:13]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[14:15]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v8double:
@@ -2486,136 +2486,136 @@ define double @test_vector_reduce_fminimum_v16double(<16 x double> %v) {
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_min_f64 v[31:32], v[0:1], v[2:3]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX7-NEXT:    v_mov_b32_e32 v33, 0x7ff80000
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v32, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v31, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v32, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v31, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX7-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[10:11]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[12:13]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[14:15]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[16:17]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[16:17]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[18:19]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[18:19]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[18:19]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[20:21]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[20:21]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[20:21]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[22:23]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[22:23]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[22:23]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[24:25]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[24:25]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[24:25]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[26:27]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[26:27]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[26:27]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[28:29]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[28:29]
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[28:29]
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    v_min_f64 v[2:3], v[0:1], v[30:31]
-; GFX7-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[30:31]
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
+; GFX7-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[30:31]
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: test_vector_reduce_fminimum_v16double:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_min_f64 v[31:32], v[0:1], v[2:3]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX8-NEXT:    v_mov_b32_e32 v33, 0x7ff80000
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v32, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v31, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v32, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v31, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX8-NEXT:    buffer_load_dword v31, off, s[0:3], s32
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[10:11]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[14:15]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[16:17]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[16:17]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[18:19]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[18:19]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[18:19]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[20:21]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[20:21]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[20:21]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[22:23]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[22:23]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[22:23]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[24:25]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[24:25]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[24:25]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[26:27]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[26:27]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[26:27]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[28:29]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[28:29]
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[28:29]
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_min_f64 v[2:3], v[0:1], v[30:31]
-; GFX8-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[30:31]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v33, vcc
+; GFX8-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[30:31]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v33, v3, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: test_vector_reduce_fminimum_v16double:
@@ -2624,237 +2624,237 @@ define double @test_vector_reduce_fminimum_v16double(<16 x double> %v) {
 ; GFX9-NEXT:    scratch_load_dword v31, off, s32
 ; GFX9-NEXT:    v_min_f64 v[32:33], v[0:1], v[2:3]
 ; GFX9-NEXT:    v_mov_b32_e32 v34, 0x7ff80000
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v33, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v32, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v33, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v32, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[6:7]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[8:9]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[10:11]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[10:11]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[12:13]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[12:13]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[14:15]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[14:15]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[16:17]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[16:17]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[18:19]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[18:19]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[18:19]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[20:21]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[20:21]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[20:21]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[22:23]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[22:23]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[22:23]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[24:25]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[24:25]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[24:25]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[26:27]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[26:27]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[26:27]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[28:29]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[28:29]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[28:29]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_min_f64 v[2:3], v[0:1], v[30:31]
-; GFX9-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[30:31]
+; GFX9-NEXT:    v_cmp_o_f64_e32 vcc, v[0:1], v[30:31]
 ; GFX9-NEXT:    s_nop 1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v34, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v34, v3, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: test_vector_reduce_fminimum_v16double:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_min_f64 v[31:32], v[0:1], v[2:3]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v32, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v31, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v32, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v31, vcc_lo
 ; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[10:11]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[12:13]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[14:15]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[16:17]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[16:17]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[18:19]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[18:19]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[18:19]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[20:21]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[20:21]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[20:21]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[22:23]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[22:23]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[22:23]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[24:25]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[24:25]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[24:25]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[26:27]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[26:27]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[26:27]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[28:29]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[28:29]
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[28:29]
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_min_f64 v[2:3], v[0:1], v[30:31]
-; GFX10-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[30:31]
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[30:31]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: test_vector_reduce_fminimum_v16double:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_min_f64 v[31:32], v[0:1], v[2:3]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v32, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v31, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v32, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v31, vcc_lo
 ; GFX11-NEXT:    scratch_load_b32 v31, off, s32
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[4:5]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[6:7]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[8:9]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[10:11]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[10:11]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[12:13]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[12:13]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[14:15]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[14:15]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[16:17]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[16:17]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[18:19]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[18:19]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[18:19]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[20:21]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[20:21]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[20:21]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[22:23]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[22:23]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[22:23]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[24:25]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[24:25]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[24:25]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[26:27]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[26:27]
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[26:27]
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[28:29]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[28:29]
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[28:29]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_min_f64 v[2:3], v[0:1], v[30:31]
-; GFX11-NEXT:    v_cmp_u_f64_e32 vcc_lo, v[0:1], v[30:31]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[30:31]
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v16double:

>From 2843150d4a55ac496423ae2ea79e83f83e2d010c Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Mon, 2 Jun 2025 12:14:41 +0200
Subject: [PATCH 3/6] variables

---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 1a5d2232213ec..e4747ccd1a4cd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4749,9 +4749,12 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
     if (isFnegOrFabs(Op1) || isFnegOrFabs(Op2))
       continue;
 
-    if (!Op1->isDivergent() && Op2->isDivergent())
+    bool IsOp1Divergent = Op1->isDivergent();
+    bool IsOp2Divergent = Op2->isDivergent();
+
+    if (!IsOp1Divergent && IsOp2Divergent)
       ShouldSwap++;
-    else if (Op1->isDivergent() && !Op2->isDivergent())
+    else if (IsOp1Divergent && !IsOp2Divergent)
       ShouldSwap--;
   }
 

>From c2c9822a66b7320707fe50643d667504d04a1678 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Thu, 5 Jun 2025 11:31:59 +0200
Subject: [PATCH 4/6] updated tests after rebase

---
 llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll | 40 ++++++++++----------
 llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll | 40 ++++++++++----------
 2 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
index 6add49178abbc..5c7cfc9099b85 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
@@ -2551,18 +2551,18 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX10-NEXT:    s_waitcnt vmcnt(23)
 ; GFX10-NEXT:    v_max_f64 v[82:83], v[0:1], v[31:32]
 ; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[31:32]
-; GFX10-NEXT:    s_waitcnt vmcnt(22)
+; GFX10-NEXT:    s_waitcnt vmcnt(21)
 ; GFX10-NEXT:    v_max_f64 v[84:85], v[2:3], v[33:34]
 ; GFX10-NEXT:    v_cmp_o_f64_e64 s4, v[2:3], v[33:34]
-; GFX10-NEXT:    s_clause 0x3
-; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:120
-; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:116
-; GFX10-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:112
-; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:108
-; GFX10-NEXT:    s_waitcnt vmcnt(24)
+; GFX10-NEXT:    s_waitcnt vmcnt(19)
 ; GFX10-NEXT:    v_max_f64 v[32:33], v[4:5], v[35:36]
 ; GFX10-NEXT:    v_cmp_o_f64_e64 s5, v[4:5], v[35:36]
-; GFX10-NEXT:    s_clause 0x2
+; GFX10-NEXT:    s_clause 0x7
+; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:112
+; GFX10-NEXT:    buffer_load_dword v67, off, s[0:3], s32 offset:104
+; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:108
+; GFX10-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:120
+; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:116
 ; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX10-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:128
 ; GFX10-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:124
@@ -2592,9 +2592,6 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX10-NEXT:    v_max_f64 v[50:51], v[18:19], v[80:81]
 ; GFX10-NEXT:    v_max_f64 v[70:71], v[22:23], v[68:69]
 ; GFX10-NEXT:    v_cmp_o_f64_e64 s14, v[22:23], v[68:69]
-; GFX10-NEXT:    s_waitcnt vmcnt(7)
-; GFX10-NEXT:    v_max_f64 v[68:69], v[24:25], v[66:67]
-; GFX10-NEXT:    v_cmp_o_f64_e64 s15, v[24:25], v[66:67]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, v34, s6
 ; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0x7ff80000, v35, s6
 ; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, v48, s7
@@ -2613,27 +2610,30 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX10-NEXT:    v_cndmask_b32_e64 v21, 0x7ff80000, v65, s13
 ; GFX10-NEXT:    v_cndmask_b32_e64 v22, 0, v70, s14
 ; GFX10-NEXT:    v_cndmask_b32_e64 v23, 0x7ff80000, v71, s14
-; GFX10-NEXT:    v_cndmask_b32_e64 v24, 0, v68, s15
-; GFX10-NEXT:    v_cndmask_b32_e64 v25, 0x7ff80000, v69, s15
+; GFX10-NEXT:    s_waitcnt vmcnt(6)
+; GFX10-NEXT:    v_max_f64 v[68:69], v[24:25], v[66:67]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s15, v[24:25], v[66:67]
 ; GFX10-NEXT:    s_waitcnt vmcnt(5)
-; GFX10-NEXT:    v_max_f64 v[80:81], v[28:29], v[0:1]
-; GFX10-NEXT:    v_cmp_o_f64_e64 s17, v[28:29], v[0:1]
+; GFX10-NEXT:    v_max_f64 v[66:67], v[26:27], v[0:1]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s16, v[26:27], v[0:1]
 ; GFX10-NEXT:    s_waitcnt vmcnt(3)
-; GFX10-NEXT:    v_max_f64 v[66:67], v[26:27], v[2:3]
-; GFX10-NEXT:    v_cmp_o_f64_e64 s16, v[26:27], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v82, vcc_lo
+; GFX10-NEXT:    v_max_f64 v[80:81], v[28:29], v[2:3]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s17, v[28:29], v[2:3]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_max_f64 v[86:87], v[30:31], v[4:5]
 ; GFX10-NEXT:    v_cmp_o_f64_e64 s18, v[30:31], v[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v82, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v83, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v84, s4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v85, s4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, v32, s5
 ; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v33, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v28, 0, v80, s17
-; GFX10-NEXT:    v_cndmask_b32_e64 v29, 0x7ff80000, v81, s17
+; GFX10-NEXT:    v_cndmask_b32_e64 v24, 0, v68, s15
+; GFX10-NEXT:    v_cndmask_b32_e64 v25, 0x7ff80000, v69, s15
 ; GFX10-NEXT:    v_cndmask_b32_e64 v26, 0, v66, s16
 ; GFX10-NEXT:    v_cndmask_b32_e64 v27, 0x7ff80000, v67, s16
+; GFX10-NEXT:    v_cndmask_b32_e64 v28, 0, v80, s17
+; GFX10-NEXT:    v_cndmask_b32_e64 v29, 0x7ff80000, v81, s17
 ; GFX10-NEXT:    v_cndmask_b32_e64 v30, 0, v86, s18
 ; GFX10-NEXT:    v_cndmask_b32_e64 v31, 0x7ff80000, v87, s18
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
index 6bddc2e5a7aad..c6b30620dd335 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
@@ -2551,18 +2551,18 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX10-NEXT:    s_waitcnt vmcnt(23)
 ; GFX10-NEXT:    v_min_f64 v[82:83], v[0:1], v[31:32]
 ; GFX10-NEXT:    v_cmp_o_f64_e32 vcc_lo, v[0:1], v[31:32]
-; GFX10-NEXT:    s_waitcnt vmcnt(22)
+; GFX10-NEXT:    s_waitcnt vmcnt(21)
 ; GFX10-NEXT:    v_min_f64 v[84:85], v[2:3], v[33:34]
 ; GFX10-NEXT:    v_cmp_o_f64_e64 s4, v[2:3], v[33:34]
-; GFX10-NEXT:    s_clause 0x3
-; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:120
-; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:116
-; GFX10-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:112
-; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:108
-; GFX10-NEXT:    s_waitcnt vmcnt(24)
+; GFX10-NEXT:    s_waitcnt vmcnt(19)
 ; GFX10-NEXT:    v_min_f64 v[32:33], v[4:5], v[35:36]
 ; GFX10-NEXT:    v_cmp_o_f64_e64 s5, v[4:5], v[35:36]
-; GFX10-NEXT:    s_clause 0x2
+; GFX10-NEXT:    s_clause 0x7
+; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:112
+; GFX10-NEXT:    buffer_load_dword v67, off, s[0:3], s32 offset:104
+; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:108
+; GFX10-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:120
+; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:116
 ; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
 ; GFX10-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:128
 ; GFX10-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:124
@@ -2592,9 +2592,6 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX10-NEXT:    v_min_f64 v[50:51], v[18:19], v[80:81]
 ; GFX10-NEXT:    v_min_f64 v[70:71], v[22:23], v[68:69]
 ; GFX10-NEXT:    v_cmp_o_f64_e64 s14, v[22:23], v[68:69]
-; GFX10-NEXT:    s_waitcnt vmcnt(7)
-; GFX10-NEXT:    v_min_f64 v[68:69], v[24:25], v[66:67]
-; GFX10-NEXT:    v_cmp_o_f64_e64 s15, v[24:25], v[66:67]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, v34, s6
 ; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0x7ff80000, v35, s6
 ; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, v48, s7
@@ -2613,27 +2610,30 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
 ; GFX10-NEXT:    v_cndmask_b32_e64 v21, 0x7ff80000, v65, s13
 ; GFX10-NEXT:    v_cndmask_b32_e64 v22, 0, v70, s14
 ; GFX10-NEXT:    v_cndmask_b32_e64 v23, 0x7ff80000, v71, s14
-; GFX10-NEXT:    v_cndmask_b32_e64 v24, 0, v68, s15
-; GFX10-NEXT:    v_cndmask_b32_e64 v25, 0x7ff80000, v69, s15
+; GFX10-NEXT:    s_waitcnt vmcnt(6)
+; GFX10-NEXT:    v_min_f64 v[68:69], v[24:25], v[66:67]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s15, v[24:25], v[66:67]
 ; GFX10-NEXT:    s_waitcnt vmcnt(5)
-; GFX10-NEXT:    v_min_f64 v[80:81], v[28:29], v[0:1]
-; GFX10-NEXT:    v_cmp_o_f64_e64 s17, v[28:29], v[0:1]
+; GFX10-NEXT:    v_min_f64 v[66:67], v[26:27], v[0:1]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s16, v[26:27], v[0:1]
 ; GFX10-NEXT:    s_waitcnt vmcnt(3)
-; GFX10-NEXT:    v_min_f64 v[66:67], v[26:27], v[2:3]
-; GFX10-NEXT:    v_cmp_o_f64_e64 s16, v[26:27], v[2:3]
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v82, vcc_lo
+; GFX10-NEXT:    v_min_f64 v[80:81], v[28:29], v[2:3]
+; GFX10-NEXT:    v_cmp_o_f64_e64 s17, v[28:29], v[2:3]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_min_f64 v[86:87], v[30:31], v[4:5]
 ; GFX10-NEXT:    v_cmp_o_f64_e64 s18, v[30:31], v[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v82, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7ff80000, v83, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v84, s4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0x7ff80000, v85, s4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, v32, s5
 ; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v33, s5
-; GFX10-NEXT:    v_cndmask_b32_e64 v28, 0, v80, s17
-; GFX10-NEXT:    v_cndmask_b32_e64 v29, 0x7ff80000, v81, s17
+; GFX10-NEXT:    v_cndmask_b32_e64 v24, 0, v68, s15
+; GFX10-NEXT:    v_cndmask_b32_e64 v25, 0x7ff80000, v69, s15
 ; GFX10-NEXT:    v_cndmask_b32_e64 v26, 0, v66, s16
 ; GFX10-NEXT:    v_cndmask_b32_e64 v27, 0x7ff80000, v67, s16
+; GFX10-NEXT:    v_cndmask_b32_e64 v28, 0, v80, s17
+; GFX10-NEXT:    v_cndmask_b32_e64 v29, 0x7ff80000, v81, s17
 ; GFX10-NEXT:    v_cndmask_b32_e64 v30, 0, v86, s18
 ; GFX10-NEXT:    v_cndmask_b32_e64 v31, 0x7ff80000, v87, s18
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]

>From d57c98f398d76dd42bd4fc858dfb517cb79947e7 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Mon, 9 Jun 2025 17:20:36 +0200
Subject: [PATCH 5/6] fix conditions

---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |  46 +-
 .../atomic_optimizations_local_pointer.ll     | 482 ++++++-------
 .../branch-folding-implicit-def-subreg.ll     |   8 +-
 llvm/test/CodeGen/AMDGPU/dagcombine-select.ll |  22 +-
 .../CodeGen/AMDGPU/extract-subvector-16bit.ll | 323 ++++-----
 llvm/test/CodeGen/AMDGPU/extract-subvector.ll | 100 +--
 .../CodeGen/AMDGPU/extract_vector_dynelt.ll   | 641 +++++++++--------
 llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll     |  52 +-
 llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll  |  90 +--
 llvm/test/CodeGen/AMDGPU/fmax_legacy.ll       |   1 -
 llvm/test/CodeGen/AMDGPU/fmaximum3.ll         |  32 +-
 llvm/test/CodeGen/AMDGPU/fmin_legacy.ll       |   2 -
 llvm/test/CodeGen/AMDGPU/fminimum3.ll         |  32 +-
 llvm/test/CodeGen/AMDGPU/fptrunc.ll           |  16 +-
 .../CodeGen/AMDGPU/indirect-addressing-si.ll  | 364 +++++-----
 .../CodeGen/AMDGPU/insert_vector_dynelt.ll    | 662 +++++++++---------
 llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll | 316 ++++-----
 .../AMDGPU/insert_vector_elt.v2bf16.ll        | 384 +++++-----
 .../CodeGen/AMDGPU/insert_vector_elt.v2i16.ll | 554 +++++++--------
 .../AMDGPU/llvm.amdgcn.iglp.opt.exp.simple.ll |  64 +-
 .../AMDGPU/llvm.amdgcn.sched.group.barrier.ll |  80 +--
 llvm/test/CodeGen/AMDGPU/llvm.exp.ll          | 240 +++----
 llvm/test/CodeGen/AMDGPU/llvm.exp10.ll        | 240 +++----
 llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll  |  86 +--
 llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll  |  64 +-
 llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll  |  74 +-
 llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll  |  64 +-
 llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll    |   3 +-
 .../AMDGPU/select-fabs-fneg-extract.f16.ll    |  40 +-
 .../AMDGPU/select-fabs-fneg-extract.ll        |  50 +-
 llvm/test/CodeGen/AMDGPU/select.f16.ll        |  58 +-
 .../AMDGPU/uniform-vgpr-to-sgpr-return.ll     |  34 +-
 llvm/test/CodeGen/AMDGPU/v_cndmask.ll         | 132 ++--
 .../CodeGen/AMDGPU/vector-alloca-bitcast.ll   |  16 +-
 .../CodeGen/AMDGPU/vector-extract-insert.ll   |  16 +-
 llvm/test/CodeGen/AMDGPU/wave32.ll            |  12 +-
 36 files changed, 2686 insertions(+), 2714 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index e4747ccd1a4cd..a3c80a52447de 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4733,8 +4733,7 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
   SDValue False = N->getOperand(2);
 
   int ShouldSwap = 0;
-  for (auto it = Cond->use_begin(); it != Cond->use_end(); it++) {
-    auto User = it->getUser();
+  for (auto *User : Cond->users()) {
 
     if (User->getOpcode() != ISD::SELECT) {
       ShouldSwap = 0;
@@ -4749,38 +4748,29 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
     if (isFnegOrFabs(Op1) || isFnegOrFabs(Op2))
       continue;
 
-    bool IsOp1Divergent = Op1->isDivergent();
-    bool IsOp2Divergent = Op2->isDivergent();
-
-    if (!IsOp1Divergent && IsOp2Divergent)
-      ShouldSwap++;
-    else if (IsOp1Divergent && !IsOp2Divergent)
-      ShouldSwap--;
+    ShouldSwap += Op2->isDivergent() - Op1->isDivergent();
   }
 
-  if (Cond->hasOneUse() || ShouldSwap > 0) {
+  if (ShouldSwap > 0) {
     SelectionDAG &DAG = DCI.DAG;
-    if (DAG.isConstantValueOfAnyType(True) &&
-        !DAG.isConstantValueOfAnyType(False)) {
-      // Swap cmp + select pair to move constant to false input.
-      // This will allow using VOPC cndmasks more often.
-      // select (setcc x, y), k, x -> select (setccinv x, y), x, k
+    // Swap cmp + select pair to move constant to false input.
+    // This will allow using VOPC cndmasks more often.
+    // select (setcc x, y), k, x -> select (setccinv x, y), x, k
 
-      SDLoc SL(N);
-      ISD::CondCode NewCC =
-          getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), LHS.getValueType());
+    SDLoc SL(N);
+    ISD::CondCode NewCC =
+        getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), LHS.getValueType());
 
-      SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC);
-      return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True);
-    }
+    SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC);
+    return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True);
+  }
 
-    if (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy()) {
-      SDValue MinMax
-        = combineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
-      // Revisit this node so we can catch min3/max3/med3 patterns.
-      //DCI.AddToWorklist(MinMax.getNode());
-      return MinMax;
-    }
+  if (Cond->hasOneUse() && (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy())) {
+    SDValue MinMax =
+        combineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
+    // Revisit this node so we can catch min3/max3/med3 patterns.
+    // DCI.AddToWorklist(MinMax.getNode());
+    return MinMax;
   }
 
   // There's no reason to not do this if the condition has other uses.
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index 8e0b3cb9aa1d5..27a0b5e3a48bd 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -10964,10 +10964,10 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) {
 ; GFX7LESS-NEXT:    v_cndmask_b32_e64 v0, 5, 0, vcc
 ; GFX7LESS-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX7LESS-NEXT:    v_mov_b32_e32 v2, s5
-; GFX7LESS-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[0:1]
-; GFX7LESS-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX7LESS-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[0:1]
+; GFX7LESS-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
 ; GFX7LESS-NEXT:    v_mov_b32_e32 v2, s4
-; GFX7LESS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7LESS-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7LESS-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX7LESS-NEXT:    s_endpgm
@@ -10995,13 +10995,13 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) {
 ; GFX8-NEXT:    v_readfirstlane_b32 s5, v1
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 5, 0, vcc
-; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[0:1]
+; GFX8-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[0:1]
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s5
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX8-NEXT:    s_mov_b32 s2, -1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX8-NEXT:    s_endpgm
@@ -11028,13 +11028,13 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) {
 ; GFX9-NEXT:    v_readfirstlane_b32 s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 5, 0, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[0:1]
+; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[0:1]
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s5
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
@@ -11062,9 +11062,9 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) {
 ; GFX1064-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1064-NEXT:    v_cndmask_b32_e64 v1, 0, 0x80000000, vcc
 ; GFX1064-NEXT:    v_cndmask_b32_e64 v0, 5, 0, vcc
-; GFX1064-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
-; GFX1064-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc
-; GFX1064-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc
+; GFX1064-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[0:1]
+; GFX1064-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc
+; GFX1064-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc
 ; GFX1064-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1064-NEXT:    s_mov_b32 s2, -1
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
@@ -11093,9 +11093,9 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) {
 ; GFX1032-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1032-NEXT:    v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
 ; GFX1032-NEXT:    v_cndmask_b32_e64 v0, 5, 0, vcc_lo
-; GFX1032-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[0:1]
-; GFX1032-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
-; GFX1032-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
+; GFX1032-NEXT:    v_cmp_le_i64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX1032-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
+; GFX1032-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX1032-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1032-NEXT:    s_mov_b32 s2, -1
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
@@ -11126,9 +11126,9 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) {
 ; GFX1164-NEXT:    v_cndmask_b32_e64 v1, 0, 0x80000000, vcc
 ; GFX1164-NEXT:    v_cndmask_b32_e64 v0, 5, 0, vcc
 ; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
-; GFX1164-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc
-; GFX1164-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc
+; GFX1164-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[0:1]
+; GFX1164-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc
+; GFX1164-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc
 ; GFX1164-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1164-NEXT:    s_mov_b32 s2, -1
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
@@ -11157,9 +11157,9 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) {
 ; GFX1132-NEXT:    v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
 ; GFX1132-NEXT:    v_cndmask_b32_e64 v0, 5, 0, vcc_lo
 ; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[0:1]
-; GFX1132-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
-; GFX1132-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
+; GFX1132-NEXT:    v_cmp_le_i64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX1132-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
+; GFX1132-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX1132-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1132-NEXT:    s_mov_b32 s2, -1
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
@@ -11221,10 +11221,10 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
 ; GFX7LESS_ITERATIVE-NEXT:    v_readfirstlane_b32 s5, v4
 ; GFX7LESS_ITERATIVE-NEXT:    v_readfirstlane_b32 s4, v3
 ; GFX7LESS_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s5
-; GFX7LESS_ITERATIVE-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[1:2]
-; GFX7LESS_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7LESS_ITERATIVE-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[1:2]
+; GFX7LESS_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
 ; GFX7LESS_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s4
-; GFX7LESS_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7LESS_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
 ; GFX7LESS_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7LESS_ITERATIVE-NEXT:    buffer_store_dwordx2 v[1:2], off, s[0:3], 0
 ; GFX7LESS_ITERATIVE-NEXT:    s_endpgm
@@ -11274,13 +11274,13 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
 ; GFX8_ITERATIVE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8_ITERATIVE-NEXT:    v_readfirstlane_b32 s5, v4
 ; GFX8_ITERATIVE-NEXT:    v_readfirstlane_b32 s4, v3
-; GFX8_ITERATIVE-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[1:2]
+; GFX8_ITERATIVE-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[1:2]
 ; GFX8_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s5
-; GFX8_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
 ; GFX8_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX8_ITERATIVE-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX8_ITERATIVE-NEXT:    s_mov_b32 s2, -1
-; GFX8_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
 ; GFX8_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8_ITERATIVE-NEXT:    buffer_store_dwordx2 v[1:2], off, s[0:3], 0
 ; GFX8_ITERATIVE-NEXT:    s_endpgm
@@ -11329,13 +11329,13 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
 ; GFX9_ITERATIVE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9_ITERATIVE-NEXT:    v_readfirstlane_b32 s5, v4
 ; GFX9_ITERATIVE-NEXT:    v_readfirstlane_b32 s4, v3
-; GFX9_ITERATIVE-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[1:2]
+; GFX9_ITERATIVE-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[1:2]
 ; GFX9_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s5
-; GFX9_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
 ; GFX9_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9_ITERATIVE-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9_ITERATIVE-NEXT:    s_mov_b32 s2, -1
-; GFX9_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
 ; GFX9_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9_ITERATIVE-NEXT:    buffer_store_dwordx2 v[1:2], off, s[0:3], 0
 ; GFX9_ITERATIVE-NEXT:    s_endpgm
@@ -11383,9 +11383,9 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
 ; GFX1064_ITERATIVE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064_ITERATIVE-NEXT:    v_readfirstlane_b32 s3, v4
 ; GFX1064_ITERATIVE-NEXT:    v_readfirstlane_b32 s2, v3
-; GFX1064_ITERATIVE-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[1:2]
-; GFX1064_ITERATIVE-NEXT:    v_cndmask_b32_e64 v2, v2, s3, vcc
-; GFX1064_ITERATIVE-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc
+; GFX1064_ITERATIVE-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[1:2]
+; GFX1064_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, s3, v2, vcc
+; GFX1064_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc
 ; GFX1064_ITERATIVE-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1064_ITERATIVE-NEXT:    s_mov_b32 s2, -1
 ; GFX1064_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
@@ -11434,9 +11434,9 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
 ; GFX1032_ITERATIVE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1032_ITERATIVE-NEXT:    v_readfirstlane_b32 s3, v4
 ; GFX1032_ITERATIVE-NEXT:    v_readfirstlane_b32 s2, v3
-; GFX1032_ITERATIVE-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[1:2]
-; GFX1032_ITERATIVE-NEXT:    v_cndmask_b32_e64 v2, v2, s3, vcc_lo
-; GFX1032_ITERATIVE-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
+; GFX1032_ITERATIVE-NEXT:    v_cmp_le_i64_e32 vcc_lo, s[2:3], v[1:2]
+; GFX1032_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, s3, v2, vcc_lo
+; GFX1032_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
 ; GFX1032_ITERATIVE-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1032_ITERATIVE-NEXT:    s_mov_b32 s2, -1
 ; GFX1032_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
@@ -11492,9 +11492,9 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
 ; GFX1164_ITERATIVE-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX1164_ITERATIVE-NEXT:    v_readfirstlane_b32 s2, v2
 ; GFX1164_ITERATIVE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164_ITERATIVE-NEXT:    v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
-; GFX1164_ITERATIVE-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc
-; GFX1164_ITERATIVE-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc
+; GFX1164_ITERATIVE-NEXT:    v_cmp_le_i64_e32 vcc, s[2:3], v[0:1]
+; GFX1164_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc
+; GFX1164_ITERATIVE-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc
 ; GFX1164_ITERATIVE-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1164_ITERATIVE-NEXT:    s_mov_b32 s2, -1
 ; GFX1164_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
@@ -11546,9 +11546,9 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
 ; GFX1132_ITERATIVE-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX1132_ITERATIVE-NEXT:    v_readfirstlane_b32 s2, v2
 ; GFX1132_ITERATIVE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132_ITERATIVE-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[0:1]
-; GFX1132_ITERATIVE-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
-; GFX1132_ITERATIVE-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
+; GFX1132_ITERATIVE-NEXT:    v_cmp_le_i64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX1132_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
+; GFX1132_ITERATIVE-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX1132_ITERATIVE-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1132_ITERATIVE-NEXT:    s_mov_b32 s2, -1
 ; GFX1132_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
@@ -11647,13 +11647,13 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
 ; GFX8_DPP-NEXT:    v_readfirstlane_b32 s4, v7
 ; GFX8_DPP-NEXT:    v_mov_b32_e32 v7, v1
 ; GFX8_DPP-NEXT:    v_mov_b32_e32 v8, v2
-; GFX8_DPP-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[7:8]
+; GFX8_DPP-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[7:8]
 ; GFX8_DPP-NEXT:    v_mov_b32_e32 v0, s5
-; GFX8_DPP-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX8_DPP-NEXT:    v_cndmask_b32_e32 v8, v0, v8, vcc
 ; GFX8_DPP-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX8_DPP-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX8_DPP-NEXT:    s_mov_b32 s2, -1
-; GFX8_DPP-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc
+; GFX8_DPP-NEXT:    v_cndmask_b32_e32 v7, v0, v7, vcc
 ; GFX8_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8_DPP-NEXT:    buffer_store_dwordx2 v[7:8], off, s[0:3], 0
 ; GFX8_DPP-NEXT:    s_endpgm
@@ -11736,13 +11736,13 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
 ; GFX9_DPP-NEXT:    v_readfirstlane_b32 s4, v7
 ; GFX9_DPP-NEXT:    v_mov_b32_e32 v7, v1
 ; GFX9_DPP-NEXT:    v_mov_b32_e32 v8, v2
-; GFX9_DPP-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[7:8]
+; GFX9_DPP-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[7:8]
 ; GFX9_DPP-NEXT:    v_mov_b32_e32 v0, s5
-; GFX9_DPP-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX9_DPP-NEXT:    v_cndmask_b32_e32 v8, v0, v8, vcc
 ; GFX9_DPP-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9_DPP-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9_DPP-NEXT:    s_mov_b32 s2, -1
-; GFX9_DPP-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc
+; GFX9_DPP-NEXT:    v_cndmask_b32_e32 v7, v0, v7, vcc
 ; GFX9_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9_DPP-NEXT:    buffer_store_dwordx2 v[7:8], off, s[0:3], 0
 ; GFX9_DPP-NEXT:    s_endpgm
@@ -11848,9 +11848,9 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v7, v4
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v8, v5
 ; GFX1064_DPP-NEXT:    s_mov_b32 s3, 0x31016000
-; GFX1064_DPP-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[7:8]
-; GFX1064_DPP-NEXT:    v_cndmask_b32_e64 v8, v8, s5, vcc
-; GFX1064_DPP-NEXT:    v_cndmask_b32_e64 v7, v7, s4, vcc
+; GFX1064_DPP-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[7:8]
+; GFX1064_DPP-NEXT:    v_cndmask_b32_e32 v8, s5, v8, vcc
+; GFX1064_DPP-NEXT:    v_cndmask_b32_e32 v7, s4, v7, vcc
 ; GFX1064_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064_DPP-NEXT:    buffer_store_dwordx2 v[7:8], off, s[0:3], 0
 ; GFX1064_DPP-NEXT:    s_endpgm
@@ -11933,9 +11933,9 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
 ; GFX1032_DPP-NEXT:    v_mov_b32_e32 v7, v4
 ; GFX1032_DPP-NEXT:    v_mov_b32_e32 v8, v5
 ; GFX1032_DPP-NEXT:    s_mov_b32 s3, 0x31016000
-; GFX1032_DPP-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[4:5], v[7:8]
-; GFX1032_DPP-NEXT:    v_cndmask_b32_e64 v8, v8, s5, vcc_lo
-; GFX1032_DPP-NEXT:    v_cndmask_b32_e64 v7, v7, s4, vcc_lo
+; GFX1032_DPP-NEXT:    v_cmp_le_i64_e32 vcc_lo, s[4:5], v[7:8]
+; GFX1032_DPP-NEXT:    v_cndmask_b32_e32 v8, s5, v8, vcc_lo
+; GFX1032_DPP-NEXT:    v_cndmask_b32_e32 v7, s4, v7, vcc_lo
 ; GFX1032_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032_DPP-NEXT:    buffer_store_dwordx2 v[7:8], off, s[0:3], 0
 ; GFX1032_DPP-NEXT:    s_endpgm
@@ -12055,9 +12055,9 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
 ; GFX1164_DPP-NEXT:    v_mov_b32_e32 v8, v5
 ; GFX1164_DPP-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1164_DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164_DPP-NEXT:    v_cmp_gt_i64_e32 vcc, s[4:5], v[7:8]
-; GFX1164_DPP-NEXT:    v_cndmask_b32_e64 v8, v8, s5, vcc
-; GFX1164_DPP-NEXT:    v_cndmask_b32_e64 v7, v7, s4, vcc
+; GFX1164_DPP-NEXT:    v_cmp_le_i64_e32 vcc, s[4:5], v[7:8]
+; GFX1164_DPP-NEXT:    v_cndmask_b32_e32 v8, s5, v8, vcc
+; GFX1164_DPP-NEXT:    v_cndmask_b32_e32 v7, s4, v7, vcc
 ; GFX1164_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164_DPP-NEXT:    buffer_store_b64 v[7:8], off, s[0:3], 0
 ; GFX1164_DPP-NEXT:    s_endpgm
@@ -12145,9 +12145,9 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
 ; GFX1132_DPP-NEXT:    v_mov_b32_e32 v8, v5
 ; GFX1132_DPP-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1132_DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132_DPP-NEXT:    v_cmp_gt_i64_e32 vcc_lo, s[4:5], v[7:8]
-; GFX1132_DPP-NEXT:    v_cndmask_b32_e64 v8, v8, s5, vcc_lo
-; GFX1132_DPP-NEXT:    v_cndmask_b32_e64 v7, v7, s4, vcc_lo
+; GFX1132_DPP-NEXT:    v_cmp_le_i64_e32 vcc_lo, s[4:5], v[7:8]
+; GFX1132_DPP-NEXT:    v_cndmask_b32_e32 v8, s5, v8, vcc_lo
+; GFX1132_DPP-NEXT:    v_cndmask_b32_e32 v7, s4, v7, vcc_lo
 ; GFX1132_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132_DPP-NEXT:    buffer_store_b64 v[7:8], off, s[0:3], 0
 ; GFX1132_DPP-NEXT:    s_endpgm
@@ -12790,10 +12790,10 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) {
 ; GFX7LESS-NEXT:    v_cndmask_b32_e64 v0, 5, -1, vcc
 ; GFX7LESS-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX7LESS-NEXT:    v_mov_b32_e32 v2, s5
-; GFX7LESS-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
-; GFX7LESS-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX7LESS-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[0:1]
+; GFX7LESS-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
 ; GFX7LESS-NEXT:    v_mov_b32_e32 v2, s4
-; GFX7LESS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7LESS-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7LESS-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX7LESS-NEXT:    s_endpgm
@@ -12821,13 +12821,13 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) {
 ; GFX8-NEXT:    v_readfirstlane_b32 s5, v1
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 5, -1, vcc
-; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
+; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[0:1]
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s5
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX8-NEXT:    s_mov_b32 s2, -1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX8-NEXT:    s_endpgm
@@ -12854,13 +12854,13 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) {
 ; GFX9-NEXT:    v_readfirstlane_b32 s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 5, -1, vcc
-; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
+; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[0:1]
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s5
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
@@ -12888,9 +12888,9 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) {
 ; GFX1064-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1064-NEXT:    v_cndmask_b32_e64 v1, 0, 0x7fffffff, vcc
 ; GFX1064-NEXT:    v_cndmask_b32_e64 v0, 5, -1, vcc
-; GFX1064-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
-; GFX1064-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc
-; GFX1064-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc
+; GFX1064-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; GFX1064-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc
+; GFX1064-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc
 ; GFX1064-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1064-NEXT:    s_mov_b32 s2, -1
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
@@ -12919,9 +12919,9 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) {
 ; GFX1032-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1032-NEXT:    v_cndmask_b32_e64 v1, 0, 0x7fffffff, vcc_lo
 ; GFX1032-NEXT:    v_cndmask_b32_e64 v0, 5, -1, vcc_lo
-; GFX1032-NEXT:    v_cmp_lt_i64_e32 vcc_lo, s[2:3], v[0:1]
-; GFX1032-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
-; GFX1032-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
+; GFX1032-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX1032-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
+; GFX1032-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX1032-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1032-NEXT:    s_mov_b32 s2, -1
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
@@ -12952,9 +12952,9 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) {
 ; GFX1164-NEXT:    v_cndmask_b32_e64 v1, 0, 0x7fffffff, vcc
 ; GFX1164-NEXT:    v_cndmask_b32_e64 v0, 5, -1, vcc
 ; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
-; GFX1164-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc
-; GFX1164-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc
+; GFX1164-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; GFX1164-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc
+; GFX1164-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc
 ; GFX1164-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1164-NEXT:    s_mov_b32 s2, -1
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
@@ -12983,9 +12983,9 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) {
 ; GFX1132-NEXT:    v_cndmask_b32_e64 v1, 0, 0x7fffffff, vcc_lo
 ; GFX1132-NEXT:    v_cndmask_b32_e64 v0, 5, -1, vcc_lo
 ; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT:    v_cmp_lt_i64_e32 vcc_lo, s[2:3], v[0:1]
-; GFX1132-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
-; GFX1132-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
+; GFX1132-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX1132-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
+; GFX1132-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX1132-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1132-NEXT:    s_mov_b32 s2, -1
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
@@ -13047,10 +13047,10 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
 ; GFX7LESS_ITERATIVE-NEXT:    v_readfirstlane_b32 s5, v4
 ; GFX7LESS_ITERATIVE-NEXT:    v_readfirstlane_b32 s4, v3
 ; GFX7LESS_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s5
-; GFX7LESS_ITERATIVE-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[1:2]
-; GFX7LESS_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7LESS_ITERATIVE-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[1:2]
+; GFX7LESS_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
 ; GFX7LESS_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s4
-; GFX7LESS_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7LESS_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
 ; GFX7LESS_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7LESS_ITERATIVE-NEXT:    buffer_store_dwordx2 v[1:2], off, s[0:3], 0
 ; GFX7LESS_ITERATIVE-NEXT:    s_endpgm
@@ -13100,13 +13100,13 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
 ; GFX8_ITERATIVE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8_ITERATIVE-NEXT:    v_readfirstlane_b32 s5, v4
 ; GFX8_ITERATIVE-NEXT:    v_readfirstlane_b32 s4, v3
-; GFX8_ITERATIVE-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[1:2]
+; GFX8_ITERATIVE-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[1:2]
 ; GFX8_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s5
-; GFX8_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
 ; GFX8_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX8_ITERATIVE-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX8_ITERATIVE-NEXT:    s_mov_b32 s2, -1
-; GFX8_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
 ; GFX8_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8_ITERATIVE-NEXT:    buffer_store_dwordx2 v[1:2], off, s[0:3], 0
 ; GFX8_ITERATIVE-NEXT:    s_endpgm
@@ -13155,13 +13155,13 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
 ; GFX9_ITERATIVE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9_ITERATIVE-NEXT:    v_readfirstlane_b32 s5, v4
 ; GFX9_ITERATIVE-NEXT:    v_readfirstlane_b32 s4, v3
-; GFX9_ITERATIVE-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[1:2]
+; GFX9_ITERATIVE-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[1:2]
 ; GFX9_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s5
-; GFX9_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
 ; GFX9_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9_ITERATIVE-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9_ITERATIVE-NEXT:    s_mov_b32 s2, -1
-; GFX9_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
 ; GFX9_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9_ITERATIVE-NEXT:    buffer_store_dwordx2 v[1:2], off, s[0:3], 0
 ; GFX9_ITERATIVE-NEXT:    s_endpgm
@@ -13209,9 +13209,9 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
 ; GFX1064_ITERATIVE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064_ITERATIVE-NEXT:    v_readfirstlane_b32 s3, v4
 ; GFX1064_ITERATIVE-NEXT:    v_readfirstlane_b32 s2, v3
-; GFX1064_ITERATIVE-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[1:2]
-; GFX1064_ITERATIVE-NEXT:    v_cndmask_b32_e64 v2, v2, s3, vcc
-; GFX1064_ITERATIVE-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc
+; GFX1064_ITERATIVE-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[1:2]
+; GFX1064_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, s3, v2, vcc
+; GFX1064_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc
 ; GFX1064_ITERATIVE-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1064_ITERATIVE-NEXT:    s_mov_b32 s2, -1
 ; GFX1064_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
@@ -13260,9 +13260,9 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
 ; GFX1032_ITERATIVE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1032_ITERATIVE-NEXT:    v_readfirstlane_b32 s3, v4
 ; GFX1032_ITERATIVE-NEXT:    v_readfirstlane_b32 s2, v3
-; GFX1032_ITERATIVE-NEXT:    v_cmp_lt_i64_e32 vcc_lo, s[2:3], v[1:2]
-; GFX1032_ITERATIVE-NEXT:    v_cndmask_b32_e64 v2, v2, s3, vcc_lo
-; GFX1032_ITERATIVE-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
+; GFX1032_ITERATIVE-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[2:3], v[1:2]
+; GFX1032_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, s3, v2, vcc_lo
+; GFX1032_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
 ; GFX1032_ITERATIVE-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1032_ITERATIVE-NEXT:    s_mov_b32 s2, -1
 ; GFX1032_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
@@ -13318,9 +13318,9 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
 ; GFX1164_ITERATIVE-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX1164_ITERATIVE-NEXT:    v_readfirstlane_b32 s2, v2
 ; GFX1164_ITERATIVE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164_ITERATIVE-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
-; GFX1164_ITERATIVE-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc
-; GFX1164_ITERATIVE-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc
+; GFX1164_ITERATIVE-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; GFX1164_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc
+; GFX1164_ITERATIVE-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc
 ; GFX1164_ITERATIVE-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1164_ITERATIVE-NEXT:    s_mov_b32 s2, -1
 ; GFX1164_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
@@ -13372,9 +13372,9 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
 ; GFX1132_ITERATIVE-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX1132_ITERATIVE-NEXT:    v_readfirstlane_b32 s2, v2
 ; GFX1132_ITERATIVE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132_ITERATIVE-NEXT:    v_cmp_lt_i64_e32 vcc_lo, s[2:3], v[0:1]
-; GFX1132_ITERATIVE-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
-; GFX1132_ITERATIVE-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
+; GFX1132_ITERATIVE-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX1132_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
+; GFX1132_ITERATIVE-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX1132_ITERATIVE-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1132_ITERATIVE-NEXT:    s_mov_b32 s2, -1
 ; GFX1132_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
@@ -13473,13 +13473,13 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
 ; GFX8_DPP-NEXT:    v_readfirstlane_b32 s4, v7
 ; GFX8_DPP-NEXT:    v_mov_b32_e32 v7, v1
 ; GFX8_DPP-NEXT:    v_mov_b32_e32 v8, v2
-; GFX8_DPP-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[7:8]
+; GFX8_DPP-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[7:8]
 ; GFX8_DPP-NEXT:    v_mov_b32_e32 v0, s5
-; GFX8_DPP-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX8_DPP-NEXT:    v_cndmask_b32_e32 v8, v0, v8, vcc
 ; GFX8_DPP-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX8_DPP-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX8_DPP-NEXT:    s_mov_b32 s2, -1
-; GFX8_DPP-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc
+; GFX8_DPP-NEXT:    v_cndmask_b32_e32 v7, v0, v7, vcc
 ; GFX8_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8_DPP-NEXT:    buffer_store_dwordx2 v[7:8], off, s[0:3], 0
 ; GFX8_DPP-NEXT:    s_endpgm
@@ -13562,13 +13562,13 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
 ; GFX9_DPP-NEXT:    v_readfirstlane_b32 s4, v7
 ; GFX9_DPP-NEXT:    v_mov_b32_e32 v7, v1
 ; GFX9_DPP-NEXT:    v_mov_b32_e32 v8, v2
-; GFX9_DPP-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[7:8]
+; GFX9_DPP-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[7:8]
 ; GFX9_DPP-NEXT:    v_mov_b32_e32 v0, s5
-; GFX9_DPP-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GFX9_DPP-NEXT:    v_cndmask_b32_e32 v8, v0, v8, vcc
 ; GFX9_DPP-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9_DPP-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9_DPP-NEXT:    s_mov_b32 s2, -1
-; GFX9_DPP-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc
+; GFX9_DPP-NEXT:    v_cndmask_b32_e32 v7, v0, v7, vcc
 ; GFX9_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9_DPP-NEXT:    buffer_store_dwordx2 v[7:8], off, s[0:3], 0
 ; GFX9_DPP-NEXT:    s_endpgm
@@ -13674,9 +13674,9 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v7, v4
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v8, v5
 ; GFX1064_DPP-NEXT:    s_mov_b32 s3, 0x31016000
-; GFX1064_DPP-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[7:8]
-; GFX1064_DPP-NEXT:    v_cndmask_b32_e64 v8, v8, s5, vcc
-; GFX1064_DPP-NEXT:    v_cndmask_b32_e64 v7, v7, s4, vcc
+; GFX1064_DPP-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[7:8]
+; GFX1064_DPP-NEXT:    v_cndmask_b32_e32 v8, s5, v8, vcc
+; GFX1064_DPP-NEXT:    v_cndmask_b32_e32 v7, s4, v7, vcc
 ; GFX1064_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064_DPP-NEXT:    buffer_store_dwordx2 v[7:8], off, s[0:3], 0
 ; GFX1064_DPP-NEXT:    s_endpgm
@@ -13759,9 +13759,9 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
 ; GFX1032_DPP-NEXT:    v_mov_b32_e32 v7, v4
 ; GFX1032_DPP-NEXT:    v_mov_b32_e32 v8, v5
 ; GFX1032_DPP-NEXT:    s_mov_b32 s3, 0x31016000
-; GFX1032_DPP-NEXT:    v_cmp_lt_i64_e32 vcc_lo, s[4:5], v[7:8]
-; GFX1032_DPP-NEXT:    v_cndmask_b32_e64 v8, v8, s5, vcc_lo
-; GFX1032_DPP-NEXT:    v_cndmask_b32_e64 v7, v7, s4, vcc_lo
+; GFX1032_DPP-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[4:5], v[7:8]
+; GFX1032_DPP-NEXT:    v_cndmask_b32_e32 v8, s5, v8, vcc_lo
+; GFX1032_DPP-NEXT:    v_cndmask_b32_e32 v7, s4, v7, vcc_lo
 ; GFX1032_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032_DPP-NEXT:    buffer_store_dwordx2 v[7:8], off, s[0:3], 0
 ; GFX1032_DPP-NEXT:    s_endpgm
@@ -13881,9 +13881,9 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
 ; GFX1164_DPP-NEXT:    v_mov_b32_e32 v8, v5
 ; GFX1164_DPP-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1164_DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164_DPP-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[7:8]
-; GFX1164_DPP-NEXT:    v_cndmask_b32_e64 v8, v8, s5, vcc
-; GFX1164_DPP-NEXT:    v_cndmask_b32_e64 v7, v7, s4, vcc
+; GFX1164_DPP-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[7:8]
+; GFX1164_DPP-NEXT:    v_cndmask_b32_e32 v8, s5, v8, vcc
+; GFX1164_DPP-NEXT:    v_cndmask_b32_e32 v7, s4, v7, vcc
 ; GFX1164_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164_DPP-NEXT:    buffer_store_b64 v[7:8], off, s[0:3], 0
 ; GFX1164_DPP-NEXT:    s_endpgm
@@ -13971,9 +13971,9 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
 ; GFX1132_DPP-NEXT:    v_mov_b32_e32 v8, v5
 ; GFX1132_DPP-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1132_DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132_DPP-NEXT:    v_cmp_lt_i64_e32 vcc_lo, s[4:5], v[7:8]
-; GFX1132_DPP-NEXT:    v_cndmask_b32_e64 v8, v8, s5, vcc_lo
-; GFX1132_DPP-NEXT:    v_cndmask_b32_e64 v7, v7, s4, vcc_lo
+; GFX1132_DPP-NEXT:    v_cmp_ge_i64_e32 vcc_lo, s[4:5], v[7:8]
+; GFX1132_DPP-NEXT:    v_cndmask_b32_e32 v8, s5, v8, vcc_lo
+; GFX1132_DPP-NEXT:    v_cndmask_b32_e32 v7, s4, v7, vcc_lo
 ; GFX1132_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132_DPP-NEXT:    buffer_store_b64 v[7:8], off, s[0:3], 0
 ; GFX1132_DPP-NEXT:    s_endpgm
@@ -14614,10 +14614,10 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
 ; GFX7LESS-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX7LESS-NEXT:    v_cndmask_b32_e64 v0, 5, 0, vcc
 ; GFX7LESS-NEXT:    v_mov_b32_e32 v2, s4
-; GFX7LESS-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1]
-; GFX7LESS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7LESS-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[0:1]
+; GFX7LESS-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX7LESS-NEXT:    v_mov_b32_e32 v1, s5
-; GFX7LESS-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX7LESS-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7LESS-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX7LESS-NEXT:    s_endpgm
@@ -14644,13 +14644,13 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
 ; GFX8-NEXT:    v_readfirstlane_b32 s4, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 5, 0, vcc
-; GFX8-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1]
+; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[0:1]
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX8-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX8-NEXT:    s_mov_b32 s2, -1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX8-NEXT:    s_endpgm
@@ -14676,13 +14676,13 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
 ; GFX9-NEXT:    v_readfirstlane_b32 s4, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 5, 0, vcc
-; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1]
+; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[0:1]
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX9-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
@@ -14710,9 +14710,9 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
 ; GFX1064-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1064-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1064-NEXT:    v_cndmask_b32_e64 v0, 5, 0, vcc
-; GFX1064-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
-; GFX1064-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc
-; GFX1064-NEXT:    v_cndmask_b32_e64 v1, 0, s3, vcc
+; GFX1064-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[0:1]
+; GFX1064-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc
+; GFX1064-NEXT:    v_cndmask_b32_e64 v1, s3, 0, vcc
 ; GFX1064-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1064-NEXT:    s_mov_b32 s2, -1
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
@@ -14741,9 +14741,9 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
 ; GFX1032-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1032-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1032-NEXT:    v_cndmask_b32_e64 v0, 5, 0, vcc_lo
-; GFX1032-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[0:1]
-; GFX1032-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
-; GFX1032-NEXT:    v_cndmask_b32_e64 v1, 0, s3, vcc_lo
+; GFX1032-NEXT:    v_cmp_le_u64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX1032-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
+; GFX1032-NEXT:    v_cndmask_b32_e64 v1, s3, 0, vcc_lo
 ; GFX1032-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1032-NEXT:    s_mov_b32 s2, -1
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
@@ -14774,9 +14774,9 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
 ; GFX1164-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1164-NEXT:    v_cndmask_b32_e64 v0, 5, 0, vcc
 ; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
-; GFX1164-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc
-; GFX1164-NEXT:    v_cndmask_b32_e64 v1, 0, s3, vcc
+; GFX1164-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[0:1]
+; GFX1164-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc
+; GFX1164-NEXT:    v_cndmask_b32_e64 v1, s3, 0, vcc
 ; GFX1164-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1164-NEXT:    s_mov_b32 s2, -1
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
@@ -14802,12 +14802,12 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
 ; GFX1132-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1132-NEXT:    v_readfirstlane_b32 s3, v1
 ; GFX1132-NEXT:    v_readfirstlane_b32 s2, v0
-; GFX1132-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1132-NEXT:    v_cndmask_b32_e64 v0, 5, 0, vcc_lo
+; GFX1132-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[0:1]
-; GFX1132-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
-; GFX1132-NEXT:    v_cndmask_b32_e64 v1, 0, s3, vcc_lo
+; GFX1132-NEXT:    v_cmp_le_u64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX1132-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
+; GFX1132-NEXT:    v_cndmask_b32_e64 v1, s3, 0, vcc_lo
 ; GFX1132-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1132-NEXT:    s_mov_b32 s2, -1
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
@@ -14868,10 +14868,10 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
 ; GFX7LESS_ITERATIVE-NEXT:    v_readfirstlane_b32 s5, v4
 ; GFX7LESS_ITERATIVE-NEXT:    v_readfirstlane_b32 s4, v3
 ; GFX7LESS_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s5
-; GFX7LESS_ITERATIVE-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[1:2]
-; GFX7LESS_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7LESS_ITERATIVE-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[1:2]
+; GFX7LESS_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
 ; GFX7LESS_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s4
-; GFX7LESS_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7LESS_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
 ; GFX7LESS_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7LESS_ITERATIVE-NEXT:    buffer_store_dwordx2 v[1:2], off, s[0:3], 0
 ; GFX7LESS_ITERATIVE-NEXT:    s_endpgm
@@ -14920,13 +14920,13 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
 ; GFX8_ITERATIVE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8_ITERATIVE-NEXT:    v_readfirstlane_b32 s5, v4
 ; GFX8_ITERATIVE-NEXT:    v_readfirstlane_b32 s4, v3
-; GFX8_ITERATIVE-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[1:2]
+; GFX8_ITERATIVE-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[1:2]
 ; GFX8_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s5
-; GFX8_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
 ; GFX8_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX8_ITERATIVE-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX8_ITERATIVE-NEXT:    s_mov_b32 s2, -1
-; GFX8_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
 ; GFX8_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8_ITERATIVE-NEXT:    buffer_store_dwordx2 v[1:2], off, s[0:3], 0
 ; GFX8_ITERATIVE-NEXT:    s_endpgm
@@ -14974,13 +14974,13 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
 ; GFX9_ITERATIVE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9_ITERATIVE-NEXT:    v_readfirstlane_b32 s5, v4
 ; GFX9_ITERATIVE-NEXT:    v_readfirstlane_b32 s4, v3
-; GFX9_ITERATIVE-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[1:2]
+; GFX9_ITERATIVE-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[1:2]
 ; GFX9_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s5
-; GFX9_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
 ; GFX9_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9_ITERATIVE-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9_ITERATIVE-NEXT:    s_mov_b32 s2, -1
-; GFX9_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
 ; GFX9_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9_ITERATIVE-NEXT:    buffer_store_dwordx2 v[1:2], off, s[0:3], 0
 ; GFX9_ITERATIVE-NEXT:    s_endpgm
@@ -15027,9 +15027,9 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
 ; GFX1064_ITERATIVE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064_ITERATIVE-NEXT:    v_readfirstlane_b32 s3, v4
 ; GFX1064_ITERATIVE-NEXT:    v_readfirstlane_b32 s2, v3
-; GFX1064_ITERATIVE-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[1:2]
-; GFX1064_ITERATIVE-NEXT:    v_cndmask_b32_e64 v2, v2, s3, vcc
-; GFX1064_ITERATIVE-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc
+; GFX1064_ITERATIVE-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[1:2]
+; GFX1064_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, s3, v2, vcc
+; GFX1064_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc
 ; GFX1064_ITERATIVE-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1064_ITERATIVE-NEXT:    s_mov_b32 s2, -1
 ; GFX1064_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
@@ -15077,9 +15077,9 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
 ; GFX1032_ITERATIVE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1032_ITERATIVE-NEXT:    v_readfirstlane_b32 s3, v4
 ; GFX1032_ITERATIVE-NEXT:    v_readfirstlane_b32 s2, v3
-; GFX1032_ITERATIVE-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[1:2]
-; GFX1032_ITERATIVE-NEXT:    v_cndmask_b32_e64 v2, v2, s3, vcc_lo
-; GFX1032_ITERATIVE-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
+; GFX1032_ITERATIVE-NEXT:    v_cmp_le_u64_e32 vcc_lo, s[2:3], v[1:2]
+; GFX1032_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, s3, v2, vcc_lo
+; GFX1032_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
 ; GFX1032_ITERATIVE-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1032_ITERATIVE-NEXT:    s_mov_b32 s2, -1
 ; GFX1032_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
@@ -15135,9 +15135,9 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
 ; GFX1164_ITERATIVE-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX1164_ITERATIVE-NEXT:    v_readfirstlane_b32 s2, v2
 ; GFX1164_ITERATIVE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164_ITERATIVE-NEXT:    v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
-; GFX1164_ITERATIVE-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc
-; GFX1164_ITERATIVE-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc
+; GFX1164_ITERATIVE-NEXT:    v_cmp_le_u64_e32 vcc, s[2:3], v[0:1]
+; GFX1164_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc
+; GFX1164_ITERATIVE-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc
 ; GFX1164_ITERATIVE-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1164_ITERATIVE-NEXT:    s_mov_b32 s2, -1
 ; GFX1164_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
@@ -15189,9 +15189,9 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
 ; GFX1132_ITERATIVE-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX1132_ITERATIVE-NEXT:    v_readfirstlane_b32 s2, v2
 ; GFX1132_ITERATIVE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132_ITERATIVE-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[0:1]
-; GFX1132_ITERATIVE-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
-; GFX1132_ITERATIVE-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
+; GFX1132_ITERATIVE-NEXT:    v_cmp_le_u64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX1132_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
+; GFX1132_ITERATIVE-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX1132_ITERATIVE-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1132_ITERATIVE-NEXT:    s_mov_b32 s2, -1
 ; GFX1132_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
@@ -15291,13 +15291,13 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
 ; GFX8_DPP-NEXT:    v_readfirstlane_b32 s4, v5
 ; GFX8_DPP-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX8_DPP-NEXT:    v_mov_b32_e32 v6, v4
-; GFX8_DPP-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[5:6]
+; GFX8_DPP-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[5:6]
 ; GFX8_DPP-NEXT:    v_mov_b32_e32 v0, s5
-; GFX8_DPP-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX8_DPP-NEXT:    v_cndmask_b32_e32 v6, v0, v6, vcc
 ; GFX8_DPP-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX8_DPP-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX8_DPP-NEXT:    s_mov_b32 s2, -1
-; GFX8_DPP-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
+; GFX8_DPP-NEXT:    v_cndmask_b32_e32 v5, v0, v5, vcc
 ; GFX8_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8_DPP-NEXT:    buffer_store_dwordx2 v[5:6], off, s[0:3], 0
 ; GFX8_DPP-NEXT:    s_endpgm
@@ -15381,13 +15381,13 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
 ; GFX9_DPP-NEXT:    v_readfirstlane_b32 s4, v5
 ; GFX9_DPP-NEXT:    v_mov_b32_e32 v5, v3
 ; GFX9_DPP-NEXT:    v_mov_b32_e32 v6, v4
-; GFX9_DPP-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[5:6]
+; GFX9_DPP-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[5:6]
 ; GFX9_DPP-NEXT:    v_mov_b32_e32 v0, s5
-; GFX9_DPP-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX9_DPP-NEXT:    v_cndmask_b32_e32 v6, v0, v6, vcc
 ; GFX9_DPP-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9_DPP-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9_DPP-NEXT:    s_mov_b32 s2, -1
-; GFX9_DPP-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
+; GFX9_DPP-NEXT:    v_cndmask_b32_e32 v5, v0, v5, vcc
 ; GFX9_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9_DPP-NEXT:    buffer_store_dwordx2 v[5:6], off, s[0:3], 0
 ; GFX9_DPP-NEXT:    s_endpgm
@@ -15493,9 +15493,9 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v7, v4
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v8, v5
 ; GFX1064_DPP-NEXT:    s_mov_b32 s3, 0x31016000
-; GFX1064_DPP-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[7:8]
-; GFX1064_DPP-NEXT:    v_cndmask_b32_e64 v8, v8, s5, vcc
-; GFX1064_DPP-NEXT:    v_cndmask_b32_e64 v7, v7, s4, vcc
+; GFX1064_DPP-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[7:8]
+; GFX1064_DPP-NEXT:    v_cndmask_b32_e32 v8, s5, v8, vcc
+; GFX1064_DPP-NEXT:    v_cndmask_b32_e32 v7, s4, v7, vcc
 ; GFX1064_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064_DPP-NEXT:    buffer_store_dwordx2 v[7:8], off, s[0:3], 0
 ; GFX1064_DPP-NEXT:    s_endpgm
@@ -15578,9 +15578,9 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
 ; GFX1032_DPP-NEXT:    v_mov_b32_e32 v7, v4
 ; GFX1032_DPP-NEXT:    v_mov_b32_e32 v8, v5
 ; GFX1032_DPP-NEXT:    s_mov_b32 s3, 0x31016000
-; GFX1032_DPP-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[4:5], v[7:8]
-; GFX1032_DPP-NEXT:    v_cndmask_b32_e64 v8, v8, s5, vcc_lo
-; GFX1032_DPP-NEXT:    v_cndmask_b32_e64 v7, v7, s4, vcc_lo
+; GFX1032_DPP-NEXT:    v_cmp_le_u64_e32 vcc_lo, s[4:5], v[7:8]
+; GFX1032_DPP-NEXT:    v_cndmask_b32_e32 v8, s5, v8, vcc_lo
+; GFX1032_DPP-NEXT:    v_cndmask_b32_e32 v7, s4, v7, vcc_lo
 ; GFX1032_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032_DPP-NEXT:    buffer_store_dwordx2 v[7:8], off, s[0:3], 0
 ; GFX1032_DPP-NEXT:    s_endpgm
@@ -15700,9 +15700,9 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
 ; GFX1164_DPP-NEXT:    v_mov_b32_e32 v8, v5
 ; GFX1164_DPP-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1164_DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164_DPP-NEXT:    v_cmp_gt_u64_e32 vcc, s[4:5], v[7:8]
-; GFX1164_DPP-NEXT:    v_cndmask_b32_e64 v8, v8, s5, vcc
-; GFX1164_DPP-NEXT:    v_cndmask_b32_e64 v7, v7, s4, vcc
+; GFX1164_DPP-NEXT:    v_cmp_le_u64_e32 vcc, s[4:5], v[7:8]
+; GFX1164_DPP-NEXT:    v_cndmask_b32_e32 v8, s5, v8, vcc
+; GFX1164_DPP-NEXT:    v_cndmask_b32_e32 v7, s4, v7, vcc
 ; GFX1164_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164_DPP-NEXT:    buffer_store_b64 v[7:8], off, s[0:3], 0
 ; GFX1164_DPP-NEXT:    s_endpgm
@@ -15784,9 +15784,9 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
 ; GFX1132_DPP-NEXT:    v_mov_b32_e32 v8, v5
 ; GFX1132_DPP-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1132_DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132_DPP-NEXT:    v_cmp_gt_u64_e32 vcc_lo, s[4:5], v[7:8]
-; GFX1132_DPP-NEXT:    v_cndmask_b32_e64 v8, v8, s5, vcc_lo
-; GFX1132_DPP-NEXT:    v_cndmask_b32_e64 v7, v7, s4, vcc_lo
+; GFX1132_DPP-NEXT:    v_cmp_le_u64_e32 vcc_lo, s[4:5], v[7:8]
+; GFX1132_DPP-NEXT:    v_cndmask_b32_e32 v8, s5, v8, vcc_lo
+; GFX1132_DPP-NEXT:    v_cndmask_b32_e32 v7, s4, v7, vcc_lo
 ; GFX1132_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132_DPP-NEXT:    buffer_store_b64 v[7:8], off, s[0:3], 0
 ; GFX1132_DPP-NEXT:    s_endpgm
@@ -16428,10 +16428,10 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) {
 ; GFX7LESS-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
 ; GFX7LESS-NEXT:    v_cndmask_b32_e64 v0, 5, -1, vcc
 ; GFX7LESS-NEXT:    v_mov_b32_e32 v2, s5
-; GFX7LESS-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1]
-; GFX7LESS-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX7LESS-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[0:1]
+; GFX7LESS-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
 ; GFX7LESS-NEXT:    v_mov_b32_e32 v2, s4
-; GFX7LESS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7LESS-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX7LESS-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7LESS-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX7LESS-NEXT:    s_endpgm
@@ -16458,13 +16458,13 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) {
 ; GFX8-NEXT:    v_readfirstlane_b32 s4, v0
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 5, -1, vcc
-; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1]
+; GFX8-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[0:1]
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s5
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX8-NEXT:    s_mov_b32 s2, -1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX8-NEXT:    s_endpgm
@@ -16490,13 +16490,13 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) {
 ; GFX9-NEXT:    v_readfirstlane_b32 s4, v0
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 5, -1, vcc
-; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1]
+; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[0:1]
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s5
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
@@ -16524,9 +16524,9 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) {
 ; GFX1064-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1064-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
 ; GFX1064-NEXT:    v_cndmask_b32_e64 v0, 5, -1, vcc
-; GFX1064-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
-; GFX1064-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc
-; GFX1064-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc
+; GFX1064-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
+; GFX1064-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc
+; GFX1064-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc
 ; GFX1064-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1064-NEXT:    s_mov_b32 s2, -1
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
@@ -16555,9 +16555,9 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) {
 ; GFX1032-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1032-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
 ; GFX1032-NEXT:    v_cndmask_b32_e64 v0, 5, -1, vcc_lo
-; GFX1032-NEXT:    v_cmp_lt_u64_e32 vcc_lo, s[2:3], v[0:1]
-; GFX1032-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
-; GFX1032-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
+; GFX1032-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX1032-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
+; GFX1032-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX1032-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1032-NEXT:    s_mov_b32 s2, -1
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
@@ -16588,9 +16588,9 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) {
 ; GFX1164-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
 ; GFX1164-NEXT:    v_cndmask_b32_e64 v0, 5, -1, vcc
 ; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
-; GFX1164-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc
-; GFX1164-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc
+; GFX1164-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
+; GFX1164-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc
+; GFX1164-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc
 ; GFX1164-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1164-NEXT:    s_mov_b32 s2, -1
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
@@ -16619,9 +16619,9 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) {
 ; GFX1132-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
 ; GFX1132-NEXT:    v_cndmask_b32_e64 v0, 5, -1, vcc_lo
 ; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132-NEXT:    v_cmp_lt_u64_e32 vcc_lo, s[2:3], v[0:1]
-; GFX1132-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
-; GFX1132-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
+; GFX1132-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX1132-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
+; GFX1132-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX1132-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1132-NEXT:    s_mov_b32 s2, -1
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
@@ -16682,10 +16682,10 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
 ; GFX7LESS_ITERATIVE-NEXT:    v_readfirstlane_b32 s5, v4
 ; GFX7LESS_ITERATIVE-NEXT:    v_readfirstlane_b32 s4, v3
 ; GFX7LESS_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s5
-; GFX7LESS_ITERATIVE-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[1:2]
-; GFX7LESS_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX7LESS_ITERATIVE-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[1:2]
+; GFX7LESS_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
 ; GFX7LESS_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s4
-; GFX7LESS_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX7LESS_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
 ; GFX7LESS_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7LESS_ITERATIVE-NEXT:    buffer_store_dwordx2 v[1:2], off, s[0:3], 0
 ; GFX7LESS_ITERATIVE-NEXT:    s_endpgm
@@ -16734,13 +16734,13 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
 ; GFX8_ITERATIVE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8_ITERATIVE-NEXT:    v_readfirstlane_b32 s5, v4
 ; GFX8_ITERATIVE-NEXT:    v_readfirstlane_b32 s4, v3
-; GFX8_ITERATIVE-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[1:2]
+; GFX8_ITERATIVE-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[1:2]
 ; GFX8_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s5
-; GFX8_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX8_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
 ; GFX8_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX8_ITERATIVE-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX8_ITERATIVE-NEXT:    s_mov_b32 s2, -1
-; GFX8_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX8_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
 ; GFX8_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8_ITERATIVE-NEXT:    buffer_store_dwordx2 v[1:2], off, s[0:3], 0
 ; GFX8_ITERATIVE-NEXT:    s_endpgm
@@ -16788,13 +16788,13 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
 ; GFX9_ITERATIVE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9_ITERATIVE-NEXT:    v_readfirstlane_b32 s5, v4
 ; GFX9_ITERATIVE-NEXT:    v_readfirstlane_b32 s4, v3
-; GFX9_ITERATIVE-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[1:2]
+; GFX9_ITERATIVE-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[1:2]
 ; GFX9_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s5
-; GFX9_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GFX9_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
 ; GFX9_ITERATIVE-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9_ITERATIVE-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9_ITERATIVE-NEXT:    s_mov_b32 s2, -1
-; GFX9_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX9_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
 ; GFX9_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9_ITERATIVE-NEXT:    buffer_store_dwordx2 v[1:2], off, s[0:3], 0
 ; GFX9_ITERATIVE-NEXT:    s_endpgm
@@ -16841,9 +16841,9 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
 ; GFX1064_ITERATIVE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064_ITERATIVE-NEXT:    v_readfirstlane_b32 s3, v4
 ; GFX1064_ITERATIVE-NEXT:    v_readfirstlane_b32 s2, v3
-; GFX1064_ITERATIVE-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[1:2]
-; GFX1064_ITERATIVE-NEXT:    v_cndmask_b32_e64 v2, v2, s3, vcc
-; GFX1064_ITERATIVE-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc
+; GFX1064_ITERATIVE-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[1:2]
+; GFX1064_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, s3, v2, vcc
+; GFX1064_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc
 ; GFX1064_ITERATIVE-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1064_ITERATIVE-NEXT:    s_mov_b32 s2, -1
 ; GFX1064_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
@@ -16891,9 +16891,9 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
 ; GFX1032_ITERATIVE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1032_ITERATIVE-NEXT:    v_readfirstlane_b32 s3, v4
 ; GFX1032_ITERATIVE-NEXT:    v_readfirstlane_b32 s2, v3
-; GFX1032_ITERATIVE-NEXT:    v_cmp_lt_u64_e32 vcc_lo, s[2:3], v[1:2]
-; GFX1032_ITERATIVE-NEXT:    v_cndmask_b32_e64 v2, v2, s3, vcc_lo
-; GFX1032_ITERATIVE-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
+; GFX1032_ITERATIVE-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[2:3], v[1:2]
+; GFX1032_ITERATIVE-NEXT:    v_cndmask_b32_e32 v2, s3, v2, vcc_lo
+; GFX1032_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
 ; GFX1032_ITERATIVE-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1032_ITERATIVE-NEXT:    s_mov_b32 s2, -1
 ; GFX1032_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
@@ -16949,9 +16949,9 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
 ; GFX1164_ITERATIVE-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX1164_ITERATIVE-NEXT:    v_readfirstlane_b32 s2, v2
 ; GFX1164_ITERATIVE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164_ITERATIVE-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
-; GFX1164_ITERATIVE-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc
-; GFX1164_ITERATIVE-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc
+; GFX1164_ITERATIVE-NEXT:    v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
+; GFX1164_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc
+; GFX1164_ITERATIVE-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc
 ; GFX1164_ITERATIVE-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1164_ITERATIVE-NEXT:    s_mov_b32 s2, -1
 ; GFX1164_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
@@ -17003,9 +17003,9 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
 ; GFX1132_ITERATIVE-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX1132_ITERATIVE-NEXT:    v_readfirstlane_b32 s2, v2
 ; GFX1132_ITERATIVE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132_ITERATIVE-NEXT:    v_cmp_lt_u64_e32 vcc_lo, s[2:3], v[0:1]
-; GFX1132_ITERATIVE-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
-; GFX1132_ITERATIVE-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
+; GFX1132_ITERATIVE-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[2:3], v[0:1]
+; GFX1132_ITERATIVE-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
+; GFX1132_ITERATIVE-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX1132_ITERATIVE-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1132_ITERATIVE-NEXT:    s_mov_b32 s2, -1
 ; GFX1132_ITERATIVE-NEXT:    s_waitcnt lgkmcnt(0)
@@ -17104,13 +17104,13 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
 ; GFX8_DPP-NEXT:    v_readfirstlane_b32 s4, v6
 ; GFX8_DPP-NEXT:    v_mov_b32_e32 v6, v1
 ; GFX8_DPP-NEXT:    v_mov_b32_e32 v7, v2
-; GFX8_DPP-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[6:7]
+; GFX8_DPP-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[6:7]
 ; GFX8_DPP-NEXT:    v_mov_b32_e32 v0, s5
-; GFX8_DPP-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc
+; GFX8_DPP-NEXT:    v_cndmask_b32_e32 v7, v0, v7, vcc
 ; GFX8_DPP-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX8_DPP-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX8_DPP-NEXT:    s_mov_b32 s2, -1
-; GFX8_DPP-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX8_DPP-NEXT:    v_cndmask_b32_e32 v6, v0, v6, vcc
 ; GFX8_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8_DPP-NEXT:    buffer_store_dwordx2 v[6:7], off, s[0:3], 0
 ; GFX8_DPP-NEXT:    s_endpgm
@@ -17193,13 +17193,13 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
 ; GFX9_DPP-NEXT:    v_readfirstlane_b32 s4, v6
 ; GFX9_DPP-NEXT:    v_mov_b32_e32 v6, v1
 ; GFX9_DPP-NEXT:    v_mov_b32_e32 v7, v2
-; GFX9_DPP-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[6:7]
+; GFX9_DPP-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[6:7]
 ; GFX9_DPP-NEXT:    v_mov_b32_e32 v0, s5
-; GFX9_DPP-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc
+; GFX9_DPP-NEXT:    v_cndmask_b32_e32 v7, v0, v7, vcc
 ; GFX9_DPP-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9_DPP-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9_DPP-NEXT:    s_mov_b32 s2, -1
-; GFX9_DPP-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GFX9_DPP-NEXT:    v_cndmask_b32_e32 v6, v0, v6, vcc
 ; GFX9_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9_DPP-NEXT:    buffer_store_dwordx2 v[6:7], off, s[0:3], 0
 ; GFX9_DPP-NEXT:    s_endpgm
@@ -17305,9 +17305,9 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v7, v4
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v8, v5
 ; GFX1064_DPP-NEXT:    s_mov_b32 s3, 0x31016000
-; GFX1064_DPP-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[7:8]
-; GFX1064_DPP-NEXT:    v_cndmask_b32_e64 v8, v8, s5, vcc
-; GFX1064_DPP-NEXT:    v_cndmask_b32_e64 v7, v7, s4, vcc
+; GFX1064_DPP-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[7:8]
+; GFX1064_DPP-NEXT:    v_cndmask_b32_e32 v8, s5, v8, vcc
+; GFX1064_DPP-NEXT:    v_cndmask_b32_e32 v7, s4, v7, vcc
 ; GFX1064_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064_DPP-NEXT:    buffer_store_dwordx2 v[7:8], off, s[0:3], 0
 ; GFX1064_DPP-NEXT:    s_endpgm
@@ -17390,9 +17390,9 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
 ; GFX1032_DPP-NEXT:    v_mov_b32_e32 v7, v4
 ; GFX1032_DPP-NEXT:    v_mov_b32_e32 v8, v5
 ; GFX1032_DPP-NEXT:    s_mov_b32 s3, 0x31016000
-; GFX1032_DPP-NEXT:    v_cmp_lt_u64_e32 vcc_lo, s[4:5], v[7:8]
-; GFX1032_DPP-NEXT:    v_cndmask_b32_e64 v8, v8, s5, vcc_lo
-; GFX1032_DPP-NEXT:    v_cndmask_b32_e64 v7, v7, s4, vcc_lo
+; GFX1032_DPP-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[4:5], v[7:8]
+; GFX1032_DPP-NEXT:    v_cndmask_b32_e32 v8, s5, v8, vcc_lo
+; GFX1032_DPP-NEXT:    v_cndmask_b32_e32 v7, s4, v7, vcc_lo
 ; GFX1032_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032_DPP-NEXT:    buffer_store_dwordx2 v[7:8], off, s[0:3], 0
 ; GFX1032_DPP-NEXT:    s_endpgm
@@ -17512,9 +17512,9 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
 ; GFX1164_DPP-NEXT:    v_mov_b32_e32 v8, v5
 ; GFX1164_DPP-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1164_DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164_DPP-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[7:8]
-; GFX1164_DPP-NEXT:    v_cndmask_b32_e64 v8, v8, s5, vcc
-; GFX1164_DPP-NEXT:    v_cndmask_b32_e64 v7, v7, s4, vcc
+; GFX1164_DPP-NEXT:    v_cmp_ge_u64_e32 vcc, s[4:5], v[7:8]
+; GFX1164_DPP-NEXT:    v_cndmask_b32_e32 v8, s5, v8, vcc
+; GFX1164_DPP-NEXT:    v_cndmask_b32_e32 v7, s4, v7, vcc
 ; GFX1164_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164_DPP-NEXT:    buffer_store_b64 v[7:8], off, s[0:3], 0
 ; GFX1164_DPP-NEXT:    s_endpgm
@@ -17596,9 +17596,9 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
 ; GFX1132_DPP-NEXT:    v_mov_b32_e32 v8, v5
 ; GFX1132_DPP-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1132_DPP-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132_DPP-NEXT:    v_cmp_lt_u64_e32 vcc_lo, s[4:5], v[7:8]
-; GFX1132_DPP-NEXT:    v_cndmask_b32_e64 v8, v8, s5, vcc_lo
-; GFX1132_DPP-NEXT:    v_cndmask_b32_e64 v7, v7, s4, vcc_lo
+; GFX1132_DPP-NEXT:    v_cmp_ge_u64_e32 vcc_lo, s[4:5], v[7:8]
+; GFX1132_DPP-NEXT:    v_cndmask_b32_e32 v8, s5, v8, vcc_lo
+; GFX1132_DPP-NEXT:    v_cndmask_b32_e32 v7, s4, v7, vcc_lo
 ; GFX1132_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132_DPP-NEXT:    buffer_store_b64 v[7:8], off, s[0:3], 0
 ; GFX1132_DPP-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index ae90cfb631e8d..0906ba0e7f891 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -914,12 +914,12 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr3 = V_OR_B32_e32 killed $vgpr11, killed $vgpr19, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr2 = V_OR_B32_e32 killed $vgpr3, killed $vgpr2, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr3 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX90A-NEXT:   renamable $vcc = V_CMP_EQ_U32_sdwa 0, killed $vgpr53, 0, $vgpr3, 0, 0, 6, implicit $exec
-  ; GFX90A-NEXT:   renamable $vgpr2 = V_CNDMASK_B32_e64 0, 0, 0, killed $vgpr2, killed $vcc, implicit $exec
+  ; GFX90A-NEXT:   renamable $sgpr20_sgpr21 = V_CMP_NE_U32_sdwa 0, killed $vgpr53, 0, $vgpr3, 0, 0, 6, implicit $exec
+  ; GFX90A-NEXT:   renamable $vgpr2 = V_CNDMASK_B32_e64 0, killed $vgpr2, 0, 0, killed $sgpr20_sgpr21, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr10 = V_OR_B32_e32 killed $vgpr52, killed $vgpr13, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr2 = V_OR_B32_e32 killed $vgpr10, killed $vgpr2, implicit $exec
-  ; GFX90A-NEXT:   renamable $vcc = V_CMP_EQ_U32_sdwa 0, killed $vgpr17, 0, $vgpr3, 0, 0, 6, implicit $exec
-  ; GFX90A-NEXT:   renamable $vgpr2 = V_CNDMASK_B32_e64 0, 0, 0, killed $vgpr2, killed $vcc, implicit $exec
+  ; GFX90A-NEXT:   renamable $sgpr20_sgpr21 = V_CMP_NE_U32_sdwa 0, killed $vgpr17, 0, $vgpr3, 0, 0, 6, implicit $exec
+  ; GFX90A-NEXT:   renamable $vgpr2 = V_CNDMASK_B32_e64 0, killed $vgpr2, 0, 0, killed $sgpr20_sgpr21, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr15, implicit $exec
   ; GFX90A-NEXT:   DS_WRITE2_B32_gfx9 killed renamable $vgpr3, killed renamable $vgpr2, renamable $vgpr3, 0, 1, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, align 4, addrspace 3)
   ; GFX90A-NEXT:   S_BRANCH %bb.65
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
index 1f7bb761b55b6..9735b8f5ebcb8 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
@@ -7,8 +7,8 @@ define amdgpu_kernel void @select_and1(ptr addrspace(1) %p, i32 %x, i32 %y) {
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_cmp_gt_i32 s2, 10
-; GCN-NEXT:    s_cselect_b32 s2, s3, 0
+; GCN-NEXT:    s_cmp_lt_i32 s2, 11
+; GCN-NEXT:    s_cselect_b32 s2, 0, s3
 ; GCN-NEXT:    v_mov_b32_e32 v1, s2
 ; GCN-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GCN-NEXT:    s_endpgm
@@ -25,8 +25,8 @@ define amdgpu_kernel void @select_and2(ptr addrspace(1) %p, i32 %x, i32 %y) {
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_cmp_gt_i32 s2, 10
-; GCN-NEXT:    s_cselect_b32 s2, s3, 0
+; GCN-NEXT:    s_cmp_lt_i32 s2, 11
+; GCN-NEXT:    s_cselect_b32 s2, 0, s3
 ; GCN-NEXT:    v_mov_b32_e32 v1, s2
 ; GCN-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GCN-NEXT:    s_endpgm
@@ -63,11 +63,11 @@ define amdgpu_kernel void @select_and_v4(ptr addrspace(1) %p, i32 %x, <4 x i32>
 ; GCN-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_cmp_gt_i32 s8, 10
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cselect_b32 s2, s2, 0
-; GCN-NEXT:    s_cselect_b32 s1, s1, 0
-; GCN-NEXT:    s_cselect_b32 s0, s0, 0
+; GCN-NEXT:    s_cmp_lt_i32 s8, 11
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cselect_b32 s2, 0, s2
+; GCN-NEXT:    s_cselect_b32 s1, 0, s1
+; GCN-NEXT:    s_cselect_b32 s0, 0, s0
 ; GCN-NEXT:    v_mov_b32_e32 v0, s0
 ; GCN-NEXT:    v_mov_b32_e32 v1, s1
 ; GCN-NEXT:    v_mov_b32_e32 v2, s2
@@ -123,8 +123,8 @@ define amdgpu_kernel void @select_or3(ptr addrspace(1) %p, i32 %x, i32 %y) {
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_cmp_gt_i32 s2, 10
-; GCN-NEXT:    s_cselect_b32 s2, s3, -1
+; GCN-NEXT:    s_cmp_lt_i32 s2, 11
+; GCN-NEXT:    s_cselect_b32 s2, -1, s3
 ; GCN-NEXT:    v_mov_b32_e32 v1, s2
 ; GCN-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GCN-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
index dbbe43152e0df..29ffe810daa9e 100644
--- a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
@@ -490,19 +490,17 @@ define <4 x half> @vec_8xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(1
 ; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:  .LBB2_3: ; %exit
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3900
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0x3d00
-; GFX9-NEXT:    v_cmp_ge_f16_e32 vcc, 0.5, v2
-; GFX9-NEXT:    v_mov_b32_e32 v5, 0x3800
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v0, v1, vcc
-; GFX9-NEXT:    v_cmp_le_f16_sdwa vcc, v2, v5 src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0x3900
 ; GFX9-NEXT:    v_cmp_nge_f16_e32 vcc, 0.5, v3
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v1, v0, vcc
-; GFX9-NEXT:    v_cmp_ge_f16_e32 vcc, 0.5, v3
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT:    v_pack_b32_f16 v1, v0, v5
-; GFX9-NEXT:    v_pack_b32_f16 v0, v4, v2
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3800
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v1, v4, vcc
+; GFX9-NEXT:    v_cmp_nge_f16_e32 vcc, 0.5, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, v1, v4, vcc
+; GFX9-NEXT:    v_cmp_nle_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v4, vcc
+; GFX9-NEXT:    v_pack_b32_f16 v0, v5, v0
+; GFX9-NEXT:    v_pack_b32_f16 v1, v3, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ; GFX9-NEXT:  .LBB2_4:
 ; GFX9-NEXT:    ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
@@ -523,20 +521,18 @@ define <4 x half> @vec_8xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(1
 ; GFX11-TRUE16-NEXT:    global_load_b128 v[2:5], v[0:1], off glc dlc
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-TRUE16-NEXT:  .LBB2_3: ; %exit
-; GFX11-TRUE16-NEXT:    v_cmp_ge_f16_e32 vcc_lo, 0.5, v2.l
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, 0x3d00
-; GFX11-TRUE16-NEXT:    v_cmp_ge_f16_e64 s0, 0.5, v2.h
+; GFX11-TRUE16-NEXT:    v_cmp_nge_f16_e32 vcc_lo, 0.5, v2.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, 0x3900
+; GFX11-TRUE16-NEXT:    v_cmp_nge_f16_e64 s0, 0.5, v2.h
 ; GFX11-TRUE16-NEXT:    v_cmp_nge_f16_e64 s1, 0.5, v3.l
-; GFX11-TRUE16-NEXT:    v_cmp_ge_f16_e64 s2, 0.5, v3.l
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3900, v0.l, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x3900, v0.l, s0
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v0.l, 0x3900, s1
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, 0x3900, v0.l, s2
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3d00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x3d00, v0.l, s0
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x3d00, v0.l, s1
 ; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v1.l
-; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v1, v2.l, v1.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v1, v1.h, v1.h
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ; GFX11-TRUE16-NEXT:  .LBB2_4:
 ; GFX11-TRUE16-NEXT:    ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
@@ -557,20 +553,18 @@ define <4 x half> @vec_8xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(1
 ; GFX11-FAKE16-NEXT:    global_load_b128 v[2:5], v[0:1], off glc dlc
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-FAKE16-NEXT:  .LBB2_3: ; %exit
-; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, 0x3d00
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, 0x3900
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
-; GFX11-FAKE16-NEXT:    v_cmp_ge_f16_e32 vcc_lo, 0.5, v2
+; GFX11-FAKE16-NEXT:    v_cmp_nge_f16_e32 vcc_lo, 0.5, v2
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x3900, v0, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_ge_f16_e32 vcc_lo, 0.5, v1
-; GFX11-FAKE16-NEXT:    v_dual_mov_b32 v4, 0x3900 :: v_dual_cndmask_b32 v1, 0x3900, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x3d00, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_nge_f16_e32 vcc_lo, 0.5, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3d00, v0, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_cmp_nge_f16_e32 vcc_lo, 0.5, v3
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, 0x3d00, v4, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_ge_f16_e32 vcc_lo, 0.5, v3
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x3900, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x3d00, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v0, v2, v1
-; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v1, v3, v4
+; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v1, v3, v3
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ; GFX11-FAKE16-NEXT:  .LBB2_4:
 ; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
@@ -1215,20 +1209,18 @@ define <4 x half> @vec_16xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ; kill: killed $vgpr0 killed $vgpr1
 ; GFX9-NEXT:  .LBB5_3: ; %exit
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3900
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0x3d00
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x3900
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cmp_ge_f16_e32 vcc, 0.5, v4
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x3800
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
-; GFX9-NEXT:    v_cmp_le_f16_sdwa vcc, v4, v3 src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v0, v1, vcc
 ; GFX9-NEXT:    v_cmp_nge_f16_e32 vcc, 0.5, v5
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v1, v0, vcc
-; GFX9-NEXT:    v_cmp_ge_f16_e32 vcc, 0.5, v5
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT:    v_pack_b32_f16 v1, v0, v4
-; GFX9-NEXT:    v_pack_b32_f16 v0, v2, v3
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3800
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v1, v2, vcc
+; GFX9-NEXT:    v_cmp_nge_f16_e32 vcc, 0.5, v4
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, v1, v2, vcc
+; GFX9-NEXT:    v_cmp_nle_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-NEXT:    v_pack_b32_f16 v0, v5, v0
+; GFX9-NEXT:    v_pack_b32_f16 v1, v3, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ; GFX9-NEXT:  .LBB5_4:
 ; GFX9-NEXT:    ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
@@ -1253,20 +1245,18 @@ define <4 x half> @vec_16xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(
 ; GFX11-TRUE16-NEXT:    global_load_b128 v[2:5], v[0:1], off glc dlc
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-TRUE16-NEXT:  .LBB5_3: ; %exit
-; GFX11-TRUE16-NEXT:    v_cmp_ge_f16_e32 vcc_lo, 0.5, v2.l
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, 0x3d00
-; GFX11-TRUE16-NEXT:    v_cmp_ge_f16_e64 s0, 0.5, v2.h
+; GFX11-TRUE16-NEXT:    v_cmp_nge_f16_e32 vcc_lo, 0.5, v2.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, 0x3900
+; GFX11-TRUE16-NEXT:    v_cmp_nge_f16_e64 s0, 0.5, v2.h
 ; GFX11-TRUE16-NEXT:    v_cmp_nge_f16_e64 s1, 0.5, v3.l
-; GFX11-TRUE16-NEXT:    v_cmp_ge_f16_e64 s2, 0.5, v3.l
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3900, v0.l, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x3900, v0.l, s0
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v0.l, 0x3900, s1
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, 0x3900, v0.l, s2
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3d00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x3d00, v0.l, s0
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x3d00, v0.l, s1
 ; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v1.l
-; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v1, v2.l, v1.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v1, v1.h, v1.h
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ; GFX11-TRUE16-NEXT:  .LBB5_4:
 ; GFX11-TRUE16-NEXT:    ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
@@ -1291,20 +1281,18 @@ define <4 x half> @vec_16xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(
 ; GFX11-FAKE16-NEXT:    global_load_b128 v[2:5], v[0:1], off glc dlc
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-FAKE16-NEXT:  .LBB5_3: ; %exit
-; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, 0x3d00
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, 0x3900
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
-; GFX11-FAKE16-NEXT:    v_cmp_ge_f16_e32 vcc_lo, 0.5, v2
+; GFX11-FAKE16-NEXT:    v_cmp_nge_f16_e32 vcc_lo, 0.5, v2
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x3900, v0, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_ge_f16_e32 vcc_lo, 0.5, v1
-; GFX11-FAKE16-NEXT:    v_dual_mov_b32 v4, 0x3900 :: v_dual_cndmask_b32 v1, 0x3900, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x3d00, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_nge_f16_e32 vcc_lo, 0.5, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3d00, v0, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_cmp_nge_f16_e32 vcc_lo, 0.5, v3
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, 0x3d00, v4, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_ge_f16_e32 vcc_lo, 0.5, v3
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x3900, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x3d00, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v0, v2, v1
-; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v1, v3, v4
+; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v1, v3, v3
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ; GFX11-FAKE16-NEXT:  .LBB5_4:
 ; GFX11-FAKE16-NEXT:    ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
@@ -1591,26 +1579,25 @@ define amdgpu_gfx <8 x i16> @vec_16xi16_extract_8xi16_0(i1 inreg %cond, ptr addr
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ; kill: killed $vgpr0 killed $vgpr1
 ; GFX9-NEXT:  .LBB7_4: ; %exit
-; GFX9-NEXT:    s_movk_i32 s35, 0x3801
 ; GFX9-NEXT:    s_movk_i32 s34, 0x3800
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3900
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0x3d00
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3d00
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0x3900
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cmp_gt_u16_e32 vcc, s35, v7
+; GFX9-NEXT:    v_cmp_lt_u16_e32 vcc, s34, v7
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v0, v1, vcc
 ; GFX9-NEXT:    v_cmp_gt_u16_sdwa vcc, v7, s34 src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT:    v_cndmask_b32_e32 v7, v1, v0, vcc
-; GFX9-NEXT:    v_cmp_gt_u16_e32 vcc, s35, v6
+; GFX9-NEXT:    v_cndmask_b32_e32 v7, v0, v1, vcc
+; GFX9-NEXT:    v_cmp_lt_u16_e32 vcc, s34, v6
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
-; GFX9-NEXT:    v_cmp_lt_u16_sdwa vcc, v6, s35 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cmp_gt_u16_sdwa vcc, v6, s34 src0_sel:WORD_1 src1_sel:DWORD
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v0, v1, vcc
-; GFX9-NEXT:    v_cmp_gt_u16_e32 vcc, s35, v5
+; GFX9-NEXT:    v_cmp_lt_u16_e32 vcc, s34, v5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v8, v0, v1, vcc
-; GFX9-NEXT:    v_cmp_lt_u16_sdwa vcc, v5, s35 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cmp_gt_u16_sdwa vcc, v5, s34 src0_sel:WORD_1 src1_sel:DWORD
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v0, v1, vcc
-; GFX9-NEXT:    v_cmp_gt_u16_e32 vcc, s35, v4
+; GFX9-NEXT:    v_cmp_lt_u16_e32 vcc, s34, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v9, v0, v1, vcc
-; GFX9-NEXT:    v_cmp_lt_u16_sdwa vcc, v4, s35 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cmp_gt_u16_sdwa vcc, v4, s34 src0_sel:WORD_1 src1_sel:DWORD
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX9-NEXT:    s_mov_b32 s34, 0x5040100
 ; GFX9-NEXT:    v_perm_b32 v0, v0, v9, s34
@@ -1633,36 +1620,36 @@ define amdgpu_gfx <8 x i16> @vec_16xi16_extract_8xi16_0(i1 inreg %cond, ptr addr
 ; GFX11-TRUE16-NEXT:  ; %bb.1: ; %F
 ; GFX11-TRUE16-NEXT:    global_load_b128 v[4:7], v[2:3], off offset:16 glc dlc
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-TRUE16-NEXT:    global_load_b128 v[4:7], v[2:3], off glc dlc
+; GFX11-TRUE16-NEXT:    global_load_b128 v[2:5], v[2:3], off glc dlc
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-TRUE16-NEXT:    s_and_not1_b32 vcc_lo, exec_lo, s0
 ; GFX11-TRUE16-NEXT:    s_cbranch_vccz .LBB7_3
 ; GFX11-TRUE16-NEXT:    s_branch .LBB7_4
 ; GFX11-TRUE16-NEXT:  .LBB7_2:
-; GFX11-TRUE16-NEXT:    ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
+; GFX11-TRUE16-NEXT:    ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
 ; GFX11-TRUE16-NEXT:  .LBB7_3: ; %T
 ; GFX11-TRUE16-NEXT:    global_load_b128 v[2:5], v[0:1], off offset:16 glc dlc
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-TRUE16-NEXT:    global_load_b128 v[4:7], v[0:1], off glc dlc
+; GFX11-TRUE16-NEXT:    global_load_b128 v[2:5], v[0:1], off glc dlc
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-TRUE16-NEXT:  .LBB7_4: ; %exit
-; GFX11-TRUE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x3801, v7.l
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v3.h, 0x3900
-; GFX11-TRUE16-NEXT:    v_cmp_gt_u16_e64 s0, 0x3801, v6.l
-; GFX11-TRUE16-NEXT:    v_cmp_lt_u16_e64 s1, 0x3800, v7.h
-; GFX11-TRUE16-NEXT:    v_cmp_gt_u16_e64 s2, 0x3801, v4.h
-; GFX11-TRUE16-NEXT:    v_cmp_gt_u16_e64 s3, 0x3801, v5.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.h, 0x3d00, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v3.h, 0x3d00, s0
-; GFX11-TRUE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x3801, v5.l
-; GFX11-TRUE16-NEXT:    v_cmp_gt_u16_e64 s0, 0x3801, v4.l
-; GFX11-TRUE16-NEXT:    v_cmp_gt_u16_e64 s34, 0x3801, v6.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, 0x3d00, s2
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v3.h, 0x3d00, s3
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v3.h, 0x3d00, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.h, 0x3d00, s0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v3.h, 0x3d00, s34
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, 0x3d00, v3.h, s1
+; GFX11-TRUE16-NEXT:    v_cmp_lt_u16_e32 vcc_lo, 0x3800, v2.h
+; GFX11-TRUE16-NEXT:    v_cmp_lt_u16_e64 s0, 0x3800, v2.l
+; GFX11-TRUE16-NEXT:    v_cmp_lt_u16_e64 s1, 0x3800, v3.h
+; GFX11-TRUE16-NEXT:    v_cmp_lt_u16_e64 s2, 0x3800, v5.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v3.h, 0x3d00
+; GFX11-TRUE16-NEXT:    v_cmp_lt_u16_e64 s3, 0x3800, v4.l
+; GFX11-TRUE16-NEXT:    v_cmp_lt_u16_e64 s34, 0x3800, v3.l
+; GFX11-TRUE16-NEXT:    v_cmp_lt_u16_e64 s35, 0x3800, v4.h
+; GFX11-TRUE16-NEXT:    v_cmp_lt_u16_e64 s36, 0x3800, v5.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.h, 0x3900, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v3.h, 0x3900, s3
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v3.h, 0x3900, s34
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.h, 0x3900, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, 0x3900, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v3.h, 0x3900, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v3.h, 0x3900, s35
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, v3.h, 0x3900, s36
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: vec_16xi16_extract_8xi16_0:
@@ -1692,34 +1679,33 @@ define amdgpu_gfx <8 x i16> @vec_16xi16_extract_8xi16_0(i1 inreg %cond, ptr addr
 ; GFX11-FAKE16-NEXT:    global_load_b128 v[2:5], v[0:1], off glc dlc
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-FAKE16-NEXT:  .LBB7_4: ; %exit
-; GFX11-FAKE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x3801, v5
-; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v9, 0x3900
-; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v1, 0x3d00
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v1, 0x3900
+; GFX11-FAKE16-NEXT:    v_cmp_lt_u16_e32 vcc_lo, 0x3800, v5
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v5
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, 0x3900, v1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x3801, v4
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, 0x3900, v1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x3801, v7
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, 0x3900, v1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x3801, v3
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x3900, v1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x3801, v2
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x3900, v1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x3801, v0
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3900, v1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x3801, v6
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, 0x3d00, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_lt_u16_e32 vcc_lo, 0x3800, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, 0x3d00, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_lt_u16_e32 vcc_lo, 0x3800, v7
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, 0x3d00, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_lt_u16_e32 vcc_lo, 0x3800, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x3d00, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_lt_u16_e32 vcc_lo, 0x3800, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x3d00, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_lt_u16_e32 vcc_lo, 0x3800, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3d00, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_lt_u16_e32 vcc_lo, 0x3800, v6
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_4)
 ; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v0, v2, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3900, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, 0x3d00, v1, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_cmp_lt_u16_e32 vcc_lo, 0x3800, v8
 ; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v7, v4, 0x5040100
-; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v1, v3, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, 0x3d00, v9, vcc_lo
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v6, v5, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v8, 0x3d00, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v6, v3, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v8, v5, 0x5040100
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   br i1 %cond, label %T, label %F
 
@@ -1931,24 +1917,24 @@ define amdgpu_gfx <8 x half> @vec_16xf16_extract_8xf16_0(i1 inreg %cond, ptr add
 ; GFX9-NEXT:    ; kill: killed $vgpr0 killed $vgpr1
 ; GFX9-NEXT:  .LBB8_4: ; %exit
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3800
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0x3900
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0x3d00
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0x3d00
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x3900
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cmp_ge_f16_e32 vcc, 0.5, v7
+; GFX9-NEXT:    v_cmp_nge_f16_e32 vcc, 0.5, v7
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v1, v2, vcc
 ; GFX9-NEXT:    v_cmp_nle_f16_sdwa vcc, v7, v0 src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT:    v_cndmask_b32_e32 v7, v2, v1, vcc
-; GFX9-NEXT:    v_cmp_ge_f16_e32 vcc, 0.5, v6
+; GFX9-NEXT:    v_cndmask_b32_e32 v7, v1, v2, vcc
+; GFX9-NEXT:    v_cmp_nge_f16_e32 vcc, 0.5, v6
 ; GFX9-NEXT:    v_cndmask_b32_e32 v8, v1, v2, vcc
-; GFX9-NEXT:    v_cmp_le_f16_sdwa vcc, v6, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cmp_nle_f16_sdwa vcc, v6, v0 src0_sel:WORD_1 src1_sel:DWORD
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v1, v2, vcc
-; GFX9-NEXT:    v_cmp_ge_f16_e32 vcc, 0.5, v5
+; GFX9-NEXT:    v_cmp_nge_f16_e32 vcc, 0.5, v5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v9, v1, v2, vcc
-; GFX9-NEXT:    v_cmp_le_f16_sdwa vcc, v5, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cmp_nle_f16_sdwa vcc, v5, v0 src0_sel:WORD_1 src1_sel:DWORD
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v1, v2, vcc
-; GFX9-NEXT:    v_cmp_ge_f16_e32 vcc, 0.5, v4
+; GFX9-NEXT:    v_cmp_nge_f16_e32 vcc, 0.5, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v10, v1, v2, vcc
-; GFX9-NEXT:    v_cmp_le_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cmp_nle_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX9-NEXT:    v_pack_b32_f16 v0, v10, v0
 ; GFX9-NEXT:    v_pack_b32_f16 v1, v9, v5
@@ -1983,29 +1969,29 @@ define amdgpu_gfx <8 x half> @vec_16xf16_extract_8xf16_0(i1 inreg %cond, ptr add
 ; GFX11-TRUE16-NEXT:    global_load_b128 v[2:5], v[0:1], off glc dlc
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-TRUE16-NEXT:  .LBB8_4: ; %exit
-; GFX11-TRUE16-NEXT:    v_cmp_ge_f16_e32 vcc_lo, 0.5, v5.l
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, 0x3d00
-; GFX11-TRUE16-NEXT:    v_cmp_ge_f16_e64 s0, 0.5, v4.l
-; GFX11-TRUE16-NEXT:    v_cmp_nge_f16_e64 s1, 0.5, v5.h
-; GFX11-TRUE16-NEXT:    v_cmp_ge_f16_e64 s2, 0.5, v2.h
-; GFX11-TRUE16-NEXT:    v_cmp_ge_f16_e64 s3, 0.5, v3.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.l, 0x3900, v0.l, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.l, 0x3900, v0.l, s0
-; GFX11-TRUE16-NEXT:    v_cmp_ge_f16_e32 vcc_lo, 0.5, v3.l
-; GFX11-TRUE16-NEXT:    v_cmp_ge_f16_e64 s0, 0.5, v2.l
-; GFX11-TRUE16-NEXT:    v_cmp_ge_f16_e64 s34, 0.5, v4.h
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x3900, v0.l, s2
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, 0x3900, v0.l, s3
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x3900, v0.l, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3900, v0.l, s0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, 0x3900, v0.l, s34
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v0.l, 0x3900, s1
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cmp_nge_f16_e32 vcc_lo, 0.5, v2.h
+; GFX11-TRUE16-NEXT:    v_cmp_nge_f16_e64 s0, 0.5, v2.l
+; GFX11-TRUE16-NEXT:    v_cmp_nge_f16_e64 s1, 0.5, v3.h
+; GFX11-TRUE16-NEXT:    v_cmp_nge_f16_e64 s2, 0.5, v5.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, 0x3900
+; GFX11-TRUE16-NEXT:    v_cmp_nge_f16_e64 s3, 0.5, v4.l
+; GFX11-TRUE16-NEXT:    v_cmp_nge_f16_e64 s34, 0.5, v3.l
+; GFX11-TRUE16-NEXT:    v_cmp_nge_f16_e64 s35, 0.5, v4.h
+; GFX11-TRUE16-NEXT:    v_cmp_nge_f16_e64 s36, 0.5, v5.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, 0x3d00, v0.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, 0x3d00, v0.l, s3
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x3d00, v0.l, s34
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3d00, v0.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x3d00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, 0x3d00, v0.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, 0x3d00, v0.l, s35
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.l, 0x3d00, v0.l, s36
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v1.h
-; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v1, v1.l, v2.l
-; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v2, v5.l, v2.h
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v3, v4.l, v3.l
+; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v1, v1.l, v2.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v2, v2.l, v3.h
+; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v3, v3.l, v4.l
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: vec_16xf16_extract_8xf16_0:
@@ -2035,34 +2021,33 @@ define amdgpu_gfx <8 x half> @vec_16xf16_extract_8xf16_0(i1 inreg %cond, ptr add
 ; GFX11-FAKE16-NEXT:    global_load_b128 v[2:5], v[0:1], off glc dlc
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-FAKE16-NEXT:  .LBB8_4: ; %exit
-; GFX11-FAKE16-NEXT:    v_cmp_ge_f16_e32 vcc_lo, 0.5, v5
-; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v9, 0x3900
-; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v1, 0x3d00
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v1, 0x3900
+; GFX11-FAKE16-NEXT:    v_cmp_nge_f16_e32 vcc_lo, 0.5, v5
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v5
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, 0x3900, v1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_ge_f16_e32 vcc_lo, 0.5, v4
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, 0x3900, v1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_ge_f16_e32 vcc_lo, 0.5, v7
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, 0x3900, v1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_ge_f16_e32 vcc_lo, 0.5, v3
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x3900, v1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_ge_f16_e32 vcc_lo, 0.5, v2
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x3900, v1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_ge_f16_e32 vcc_lo, 0.5, v0
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3900, v1, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_ge_f16_e32 vcc_lo, 0.5, v6
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, 0x3d00, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_nge_f16_e32 vcc_lo, 0.5, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, 0x3d00, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_nge_f16_e32 vcc_lo, 0.5, v7
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, 0x3d00, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_nge_f16_e32 vcc_lo, 0.5, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x3d00, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_nge_f16_e32 vcc_lo, 0.5, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x3d00, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_nge_f16_e32 vcc_lo, 0.5, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3d00, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_nge_f16_e32 vcc_lo, 0.5, v6
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_4)
 ; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v0, v2, v0
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3900, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, 0x3d00, v1, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_cmp_nge_f16_e32 vcc_lo, 0.5, v8
 ; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v2, v4, v7
-; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v1, v3, v1
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, 0x3d00, v9, vcc_lo
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v3, v5, v6
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v8, 0x3d00, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v1, v3, v6
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v3, v5, v8
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   br i1 %cond, label %T, label %F
 
diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll
index 41082821bafe3..5089cd50a8edd 100644
--- a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll
@@ -210,14 +210,14 @@ define <4 x i64> @extract_4xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0xffff8000
-; GCN-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[4:5]
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, -1, vcc
-; GCN-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[6:7]
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v1, -1, vcc
-; GCN-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[8:9]
-; GCN-NEXT:    v_cndmask_b32_e64 v4, v1, -1, vcc
-; GCN-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[10:11]
-; GCN-NEXT:    v_cndmask_b32_e64 v6, v1, -1, vcc
+; GCN-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[4:5]
+; GCN-NEXT:    v_cndmask_b32_e32 v0, -1, v1, vcc
+; GCN-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[6:7]
+; GCN-NEXT:    v_cndmask_b32_e32 v2, -1, v1, vcc
+; GCN-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[8:9]
+; GCN-NEXT:    v_cndmask_b32_e32 v4, -1, v1, vcc
+; GCN-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[10:11]
+; GCN-NEXT:    v_cndmask_b32_e32 v6, -1, v1, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v1, -1
 ; GCN-NEXT:    v_mov_b32_e32 v3, -1
 ; GCN-NEXT:    v_mov_b32_e32 v5, -1
@@ -300,23 +300,23 @@ define <8 x i64> @extract_8xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %
 ; GCN-NEXT:  .LBB3_4: ; %exit
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0xffff8000
-; GCN-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[6:7]
-; GCN-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[8:9]
-; GCN-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[10:11]
-; GCN-NEXT:    v_cmp_gt_i64_e64 s[8:9], 0, v[12:13]
-; GCN-NEXT:    v_cmp_gt_i64_e64 s[10:11], 0, v[14:15]
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_gt_i64_e64 s[12:13], 0, v[16:17]
-; GCN-NEXT:    v_cmp_gt_i64_e64 s[14:15], 0, v[18:19]
-; GCN-NEXT:    v_cmp_gt_i64_e64 s[16:17], 0, v[4:5]
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, -1, s[16:17]
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v1, -1, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v4, v1, -1, s[4:5]
-; GCN-NEXT:    v_cndmask_b32_e64 v6, v1, -1, s[6:7]
-; GCN-NEXT:    v_cndmask_b32_e64 v8, v1, -1, s[8:9]
-; GCN-NEXT:    v_cndmask_b32_e64 v10, v1, -1, s[10:11]
-; GCN-NEXT:    v_cndmask_b32_e64 v12, v1, -1, s[12:13]
-; GCN-NEXT:    v_cndmask_b32_e64 v14, v1, -1, s[14:15]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[18:19]
+; GCN-NEXT:    v_cmp_lt_i64_e64 s[4:5], -1, v[16:17]
+; GCN-NEXT:    v_cmp_lt_i64_e64 s[6:7], -1, v[14:15]
+; GCN-NEXT:    v_cmp_lt_i64_e64 s[8:9], -1, v[12:13]
+; GCN-NEXT:    v_cmp_lt_i64_e64 s[10:11], -1, v[10:11]
+; GCN-NEXT:    v_cmp_lt_i64_e64 s[12:13], -1, v[8:9]
+; GCN-NEXT:    v_cmp_lt_i64_e64 s[14:15], -1, v[6:7]
+; GCN-NEXT:    v_cmp_lt_i64_e64 s[16:17], -1, v[4:5]
+; GCN-NEXT:    v_cndmask_b32_e64 v0, -1, v1, s[16:17]
+; GCN-NEXT:    v_cndmask_b32_e64 v2, -1, v1, s[14:15]
+; GCN-NEXT:    v_cndmask_b32_e64 v4, -1, v1, s[12:13]
+; GCN-NEXT:    v_cndmask_b32_e64 v6, -1, v1, s[10:11]
+; GCN-NEXT:    v_cndmask_b32_e64 v8, -1, v1, s[8:9]
+; GCN-NEXT:    v_cndmask_b32_e64 v10, -1, v1, s[6:7]
+; GCN-NEXT:    v_cndmask_b32_e64 v12, -1, v1, s[4:5]
+; GCN-NEXT:    v_cndmask_b32_e32 v14, -1, v1, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v1, -1
 ; GCN-NEXT:    v_mov_b32_e32 v3, -1
 ; GCN-NEXT:    v_mov_b32_e32 v5, -1
@@ -459,14 +459,14 @@ define <4 x double> @extract_4xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0xbff00000
-; GCN-NEXT:    v_cmp_nlt_f64_e32 vcc, -1.0, v[4:5]
-; GCN-NEXT:    v_cndmask_b32_e32 v1, -2.0, v0, vcc
-; GCN-NEXT:    v_cmp_nlt_f64_e32 vcc, -1.0, v[6:7]
-; GCN-NEXT:    v_cndmask_b32_e32 v3, -2.0, v0, vcc
-; GCN-NEXT:    v_cmp_nlt_f64_e32 vcc, -1.0, v[8:9]
-; GCN-NEXT:    v_cndmask_b32_e32 v5, -2.0, v0, vcc
-; GCN-NEXT:    v_cmp_nlt_f64_e32 vcc, -1.0, v[10:11]
-; GCN-NEXT:    v_cndmask_b32_e32 v7, -2.0, v0, vcc
+; GCN-NEXT:    v_cmp_lt_f64_e32 vcc, -1.0, v[4:5]
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v0, -2.0, vcc
+; GCN-NEXT:    v_cmp_lt_f64_e32 vcc, -1.0, v[6:7]
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v0, -2.0, vcc
+; GCN-NEXT:    v_cmp_lt_f64_e32 vcc, -1.0, v[8:9]
+; GCN-NEXT:    v_cndmask_b32_e64 v5, v0, -2.0, vcc
+; GCN-NEXT:    v_cmp_lt_f64_e32 vcc, -1.0, v[10:11]
+; GCN-NEXT:    v_cndmask_b32_e64 v7, v0, -2.0, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0
@@ -549,23 +549,23 @@ define <8 x double> @extract_8xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i
 ; GCN-NEXT:  .LBB6_4: ; %exit
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0xbff00000
-; GCN-NEXT:    v_cmp_nlt_f64_e32 vcc, -1.0, v[6:7]
-; GCN-NEXT:    v_cmp_nlt_f64_e64 s[4:5], -1.0, v[8:9]
-; GCN-NEXT:    v_cmp_nlt_f64_e64 s[6:7], -1.0, v[10:11]
-; GCN-NEXT:    v_cmp_nlt_f64_e64 s[8:9], -1.0, v[12:13]
-; GCN-NEXT:    v_cmp_nlt_f64_e64 s[10:11], -1.0, v[14:15]
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_nlt_f64_e64 s[12:13], -1.0, v[16:17]
-; GCN-NEXT:    v_cmp_nlt_f64_e64 s[14:15], -1.0, v[18:19]
-; GCN-NEXT:    v_cmp_nlt_f64_e64 s[16:17], -1.0, v[4:5]
-; GCN-NEXT:    v_cndmask_b32_e64 v1, -2.0, v0, s[16:17]
-; GCN-NEXT:    v_cndmask_b32_e32 v3, -2.0, v0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v5, -2.0, v0, s[4:5]
-; GCN-NEXT:    v_cndmask_b32_e64 v7, -2.0, v0, s[6:7]
-; GCN-NEXT:    v_cndmask_b32_e64 v9, -2.0, v0, s[8:9]
-; GCN-NEXT:    v_cndmask_b32_e64 v11, -2.0, v0, s[10:11]
-; GCN-NEXT:    v_cndmask_b32_e64 v13, -2.0, v0, s[12:13]
-; GCN-NEXT:    v_cndmask_b32_e64 v15, -2.0, v0, s[14:15]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_cmp_lt_f64_e32 vcc, -1.0, v[18:19]
+; GCN-NEXT:    v_cmp_lt_f64_e64 s[4:5], -1.0, v[16:17]
+; GCN-NEXT:    v_cmp_lt_f64_e64 s[6:7], -1.0, v[14:15]
+; GCN-NEXT:    v_cmp_lt_f64_e64 s[8:9], -1.0, v[12:13]
+; GCN-NEXT:    v_cmp_lt_f64_e64 s[10:11], -1.0, v[10:11]
+; GCN-NEXT:    v_cmp_lt_f64_e64 s[12:13], -1.0, v[8:9]
+; GCN-NEXT:    v_cmp_lt_f64_e64 s[14:15], -1.0, v[6:7]
+; GCN-NEXT:    v_cmp_lt_f64_e64 s[16:17], -1.0, v[4:5]
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v0, -2.0, s[16:17]
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v0, -2.0, s[14:15]
+; GCN-NEXT:    v_cndmask_b32_e64 v5, v0, -2.0, s[12:13]
+; GCN-NEXT:    v_cndmask_b32_e64 v7, v0, -2.0, s[10:11]
+; GCN-NEXT:    v_cndmask_b32_e64 v9, v0, -2.0, s[8:9]
+; GCN-NEXT:    v_cndmask_b32_e64 v11, v0, -2.0, s[6:7]
+; GCN-NEXT:    v_cndmask_b32_e64 v13, v0, -2.0, s[4:5]
+; GCN-NEXT:    v_cndmask_b32_e64 v15, v0, -2.0, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
index 9ddf3e9340435..5b6eb2fa34f9e 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
@@ -9,13 +9,13 @@ define amdgpu_kernel void @float4_extelt(ptr addrspace(1) %out, i32 %sel) {
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_cmp_eq_u32 s6, 1
 ; GCN-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s6, 2
+; GCN-NEXT:    s_cmp_eq_u32 s6, 2
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, s[2:3]
-; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s6, 3
-; GCN-NEXT:    v_cndmask_b32_e32 v0, 2.0, v0, vcc
-; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    v_cndmask_b32_e32 v2, 4.0, v0, vcc
+; GCN-NEXT:    s_cselect_b64 s[2:3], -1, 0
+; GCN-NEXT:    s_cmp_eq_u32 s6, 3
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 2.0, s[2:3]
+; GCN-NEXT:    s_cselect_b64 s[2:3], -1, 0
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v0, 4.0, s[2:3]
 ; GCN-NEXT:    v_mov_b32_e32 v0, s0
 ; GCN-NEXT:    v_mov_b32_e32 v1, s1
 ; GCN-NEXT:    flat_store_dword v[0:1], v2
@@ -34,10 +34,10 @@ define amdgpu_kernel void @int4_extelt(ptr addrspace(1) %out, i32 %sel) {
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_cmp_eq_u32 s2, 1
 ; GCN-NEXT:    s_cselect_b32 s3, 1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 2
-; GCN-NEXT:    s_cselect_b32 s3, s3, 2
-; GCN-NEXT:    s_cmp_lg_u32 s2, 3
-; GCN-NEXT:    s_cselect_b32 s2, s3, 4
+; GCN-NEXT:    s_cmp_eq_u32 s2, 2
+; GCN-NEXT:    s_cselect_b32 s3, 2, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 3
+; GCN-NEXT:    s_cselect_b32 s2, 4, s3
 ; GCN-NEXT:    v_mov_b32_e32 v0, s0
 ; GCN-NEXT:    v_mov_b32_e32 v1, s1
 ; GCN-NEXT:    v_mov_b32_e32 v2, s2
@@ -188,31 +188,31 @@ define amdgpu_kernel void @half8_extelt(ptr addrspace(1) %out, i32 %sel) {
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_cmp_eq_u32 s2, 1
 ; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 2
+; GCN-NEXT:    s_cmp_eq_u32 s2, 2
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0x4200
 ; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 3
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; GCN-NEXT:    s_cmp_eq_u32 s2, 3
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0x4400
 ; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 4
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; GCN-NEXT:    s_cmp_eq_u32 s2, 4
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0x4500
 ; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 5
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; GCN-NEXT:    s_cmp_eq_u32 s2, 5
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0x4600
 ; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 6
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; GCN-NEXT:    s_cmp_eq_u32 s2, 6
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0x4700
 ; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 7
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; GCN-NEXT:    s_cmp_eq_u32 s2, 7
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0x4800
 ; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v0, s0
 ; GCN-NEXT:    v_mov_b32_e32 v1, s1
 ; GCN-NEXT:    flat_store_short v[0:1], v2
@@ -231,18 +231,18 @@ define amdgpu_kernel void @short8_extelt(ptr addrspace(1) %out, i32 %sel) {
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_cmp_eq_u32 s2, 1
 ; GCN-NEXT:    s_cselect_b32 s3, 2, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 2
-; GCN-NEXT:    s_cselect_b32 s3, s3, 3
-; GCN-NEXT:    s_cmp_lg_u32 s2, 3
-; GCN-NEXT:    s_cselect_b32 s3, s3, 4
-; GCN-NEXT:    s_cmp_lg_u32 s2, 4
-; GCN-NEXT:    s_cselect_b32 s3, s3, 5
-; GCN-NEXT:    s_cmp_lg_u32 s2, 5
-; GCN-NEXT:    s_cselect_b32 s3, s3, 6
-; GCN-NEXT:    s_cmp_lg_u32 s2, 6
-; GCN-NEXT:    s_cselect_b32 s3, s3, 7
-; GCN-NEXT:    s_cmp_lg_u32 s2, 7
-; GCN-NEXT:    s_cselect_b32 s2, s3, 8
+; GCN-NEXT:    s_cmp_eq_u32 s2, 2
+; GCN-NEXT:    s_cselect_b32 s3, 3, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 3
+; GCN-NEXT:    s_cselect_b32 s3, 4, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 4
+; GCN-NEXT:    s_cselect_b32 s3, 5, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 5
+; GCN-NEXT:    s_cselect_b32 s3, 6, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 6
+; GCN-NEXT:    s_cselect_b32 s3, 7, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 7
+; GCN-NEXT:    s_cselect_b32 s2, 8, s3
 ; GCN-NEXT:    v_mov_b32_e32 v0, s0
 ; GCN-NEXT:    v_mov_b32_e32 v1, s1
 ; GCN-NEXT:    v_mov_b32_e32 v2, s2
@@ -657,34 +657,34 @@ define amdgpu_kernel void @byte16_extelt(ptr addrspace(1) %out, i32 %sel) {
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_cmp_eq_u32 s2, 1
 ; GCN-NEXT:    s_cselect_b32 s3, 2, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 2
-; GCN-NEXT:    s_cselect_b32 s3, s3, 3
-; GCN-NEXT:    s_cmp_lg_u32 s2, 3
-; GCN-NEXT:    s_cselect_b32 s3, s3, 4
-; GCN-NEXT:    s_cmp_lg_u32 s2, 4
-; GCN-NEXT:    s_cselect_b32 s3, s3, 5
-; GCN-NEXT:    s_cmp_lg_u32 s2, 5
-; GCN-NEXT:    s_cselect_b32 s3, s3, 6
-; GCN-NEXT:    s_cmp_lg_u32 s2, 6
-; GCN-NEXT:    s_cselect_b32 s3, s3, 7
-; GCN-NEXT:    s_cmp_lg_u32 s2, 7
-; GCN-NEXT:    s_cselect_b32 s3, s3, 8
-; GCN-NEXT:    s_cmp_lg_u32 s2, 8
-; GCN-NEXT:    s_cselect_b32 s3, s3, 9
-; GCN-NEXT:    s_cmp_lg_u32 s2, 9
-; GCN-NEXT:    s_cselect_b32 s3, s3, 10
-; GCN-NEXT:    s_cmp_lg_u32 s2, 10
-; GCN-NEXT:    s_cselect_b32 s3, s3, 11
-; GCN-NEXT:    s_cmp_lg_u32 s2, 11
-; GCN-NEXT:    s_cselect_b32 s3, s3, 12
-; GCN-NEXT:    s_cmp_lg_u32 s2, 12
-; GCN-NEXT:    s_cselect_b32 s3, s3, 13
-; GCN-NEXT:    s_cmp_lg_u32 s2, 13
-; GCN-NEXT:    s_cselect_b32 s3, s3, 14
-; GCN-NEXT:    s_cmp_lg_u32 s2, 14
-; GCN-NEXT:    s_cselect_b32 s3, s3, 15
-; GCN-NEXT:    s_cmp_lg_u32 s2, 15
-; GCN-NEXT:    s_cselect_b32 s2, s3, 16
+; GCN-NEXT:    s_cmp_eq_u32 s2, 2
+; GCN-NEXT:    s_cselect_b32 s3, 3, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 3
+; GCN-NEXT:    s_cselect_b32 s3, 4, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 4
+; GCN-NEXT:    s_cselect_b32 s3, 5, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 5
+; GCN-NEXT:    s_cselect_b32 s3, 6, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 6
+; GCN-NEXT:    s_cselect_b32 s3, 7, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 7
+; GCN-NEXT:    s_cselect_b32 s3, 8, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 8
+; GCN-NEXT:    s_cselect_b32 s3, 9, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 9
+; GCN-NEXT:    s_cselect_b32 s3, 10, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 10
+; GCN-NEXT:    s_cselect_b32 s3, 11, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 11
+; GCN-NEXT:    s_cselect_b32 s3, 12, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 12
+; GCN-NEXT:    s_cselect_b32 s3, 13, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 13
+; GCN-NEXT:    s_cselect_b32 s3, 14, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 14
+; GCN-NEXT:    s_cselect_b32 s3, 15, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 15
+; GCN-NEXT:    s_cselect_b32 s2, 16, s3
 ; GCN-NEXT:    v_mov_b32_e32 v0, s0
 ; GCN-NEXT:    v_mov_b32_e32 v1, s1
 ; GCN-NEXT:    v_mov_b32_e32 v2, s2
@@ -726,259 +726,259 @@ define amdgpu_kernel void @bit128_extelt(ptr addrspace(1) %out, i32 %sel) {
 ; GCN-NEXT:    s_cmp_lg_u32 s2, 1
 ; GCN-NEXT:    s_cselect_b64 s[4:5], -1, 0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; GCN-NEXT:    s_cmp_lg_u32 s2, 2
+; GCN-NEXT:    s_cmp_eq_u32 s2, 2
 ; GCN-NEXT:    v_readfirstlane_b32 s3, v0
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 3
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 4
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 5
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 6
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 7
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 8
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 9
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 10
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 11
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 12
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 13
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 14
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 15
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 16
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 17
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 18
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 19
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 20
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 21
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 22
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 23
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 24
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 25
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 26
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 27
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 28
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 29
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 30
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 31
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 32
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 33
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 34
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 35
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 36
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 37
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 38
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 39
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 40
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 41
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 42
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 43
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 44
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 45
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 46
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 47
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 48
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 49
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 50
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 51
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 52
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 53
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 54
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 55
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 56
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 57
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 58
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 59
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 60
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 61
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 62
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s2, 63
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 64
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x41
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x42
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x43
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x44
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x45
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x46
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x47
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x48
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x49
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x4a
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x4b
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x4c
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x4d
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x4e
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x4f
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x50
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x51
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x52
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x53
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x54
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x55
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x56
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x57
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x58
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x59
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x5a
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x5b
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x5c
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x5d
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x5e
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x5f
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x60
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x61
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x62
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x63
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x64
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x65
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x66
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x67
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x68
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x69
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x6a
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x6b
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x6c
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x6d
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x6e
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x6f
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x70
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x71
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x72
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x73
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x74
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x75
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x76
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x77
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x78
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x79
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x7a
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x7b
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x7c
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x7d
-; GCN-NEXT:    s_cselect_b32 s3, s3, 0
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x7e
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x7f
-; GCN-NEXT:    s_cselect_b32 s2, s3, 0
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 3
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 4
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 5
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 6
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 7
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 8
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 9
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 10
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 11
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 12
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 13
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 14
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 15
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 16
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 17
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 18
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 19
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 20
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 21
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 22
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 23
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 24
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 25
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 26
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 27
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 28
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 29
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 30
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 31
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 32
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 33
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 34
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 35
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 36
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 37
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 38
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 39
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 40
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 41
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 42
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 43
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 44
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 45
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 46
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 47
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 48
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 49
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 50
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 51
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 52
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 53
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 54
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 55
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 56
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 57
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 58
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 59
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 60
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 61
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 62
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 63
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmp_eq_u32 s2, 64
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x41
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x42
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x43
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x44
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x45
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x46
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x47
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x48
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x49
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x4a
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x4b
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x4c
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x4d
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x4e
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x4f
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x50
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x51
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x52
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x53
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x54
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x55
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x56
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x57
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x58
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x59
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x5a
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x5b
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x5c
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x5d
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x5e
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x5f
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x60
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x61
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x62
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x63
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x64
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x65
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x66
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x67
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x68
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x69
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x6a
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x6b
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x6c
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x6d
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x6e
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x6f
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x70
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x71
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x72
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x73
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x74
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x75
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x76
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x77
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x78
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x79
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x7a
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x7b
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x7c
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x7d
+; GCN-NEXT:    s_cselect_b32 s3, 0, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x7e
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmpk_eq_i32 s2, 0x7f
+; GCN-NEXT:    s_cselect_b32 s2, 0, s3
 ; GCN-NEXT:    s_and_b32 s2, s2, 1
 ; GCN-NEXT:    v_mov_b32_e32 v0, s0
 ; GCN-NEXT:    v_mov_b32_e32 v1, s1
@@ -1097,18 +1097,17 @@ define double @double16_extelt_vec(i32 %sel) {
 ; GCN-LABEL: double16_extelt_vec:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
-; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v0
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0x9999999a
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0xcccccccd
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v1, v2, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e64 s[4:5], 2, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v3, 0x3ff19999
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0x4000cccc
-; GCN-NEXT:    s_or_b64 vcc, vcc, s[4:5]
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
-; GCN-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0x4008cccc
-; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 2, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v4, v3, s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0x40106666
 ; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 3, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
index e74d5ba24079d..8b9bd98779b2d 100644
--- a/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
@@ -1083,8 +1083,8 @@ define amdgpu_ps i16 @s_copysign_out_f16_mag_f64_sign_f16(double inreg %mag, hal
 ; SI-NEXT:    s_or_b32 s5, s5, s6
 ; SI-NEXT:    s_lshr_b32 s3, s3, 2
 ; SI-NEXT:    s_add_i32 s3, s3, s5
-; SI-NEXT:    s_cmp_lt_i32 s4, 31
-; SI-NEXT:    s_cselect_b32 s3, s3, 0x7c00
+; SI-NEXT:    s_cmp_gt_i32 s4, 30
+; SI-NEXT:    s_cselect_b32 s3, 0x7c00, s3
 ; SI-NEXT:    s_cmp_lg_u32 s0, 0
 ; SI-NEXT:    s_movk_i32 s0, 0x7e00
 ; SI-NEXT:    s_cselect_b32 s0, s0, 0x7c00
@@ -1135,8 +1135,8 @@ define amdgpu_ps i16 @s_copysign_out_f16_mag_f64_sign_f16(double inreg %mag, hal
 ; VI-NEXT:    s_or_b32 s4, s4, s5
 ; VI-NEXT:    s_lshr_b32 s3, s3, 2
 ; VI-NEXT:    s_add_i32 s3, s3, s4
-; VI-NEXT:    s_cmp_lt_i32 s1, 31
-; VI-NEXT:    s_cselect_b32 s3, s3, 0x7c00
+; VI-NEXT:    s_cmp_gt_i32 s1, 30
+; VI-NEXT:    s_cselect_b32 s3, 0x7c00, s3
 ; VI-NEXT:    s_cmp_lg_u32 s0, 0
 ; VI-NEXT:    s_movk_i32 s0, 0x7e00
 ; VI-NEXT:    s_cselect_b32 s0, s0, 0x7c00
@@ -1183,8 +1183,8 @@ define amdgpu_ps i16 @s_copysign_out_f16_mag_f64_sign_f16(double inreg %mag, hal
 ; GFX9-NEXT:    s_or_b32 s4, s4, s5
 ; GFX9-NEXT:    s_lshr_b32 s3, s3, 2
 ; GFX9-NEXT:    s_add_i32 s3, s3, s4
-; GFX9-NEXT:    s_cmp_lt_i32 s1, 31
-; GFX9-NEXT:    s_cselect_b32 s3, s3, 0x7c00
+; GFX9-NEXT:    s_cmp_gt_i32 s1, 30
+; GFX9-NEXT:    s_cselect_b32 s3, 0x7c00, s3
 ; GFX9-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX9-NEXT:    s_movk_i32 s0, 0x7e00
 ; GFX9-NEXT:    s_cselect_b32 s0, s0, 0x7c00
@@ -1238,9 +1238,9 @@ define amdgpu_ps i16 @s_copysign_out_f16_mag_f64_sign_f16(double inreg %mag, hal
 ; GFX11-TRUE16-NEXT:    s_or_b32 s4, s4, s5
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-TRUE16-NEXT:    s_add_i32 s3, s3, s4
-; GFX11-TRUE16-NEXT:    s_cmp_lt_i32 s0, 31
+; GFX11-TRUE16-NEXT:    s_cmp_gt_i32 s0, 30
 ; GFX11-TRUE16-NEXT:    s_movk_i32 s4, 0x7e00
-; GFX11-TRUE16-NEXT:    s_cselect_b32 s3, s3, 0x7c00
+; GFX11-TRUE16-NEXT:    s_cselect_b32 s3, 0x7c00, s3
 ; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s1, s4, 0x7c00
 ; GFX11-TRUE16-NEXT:    s_cmpk_eq_i32 s0, 0x40f
@@ -1294,9 +1294,9 @@ define amdgpu_ps i16 @s_copysign_out_f16_mag_f64_sign_f16(double inreg %mag, hal
 ; GFX11-FAKE16-NEXT:    s_or_b32 s4, s4, s5
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-FAKE16-NEXT:    s_add_i32 s3, s3, s4
-; GFX11-FAKE16-NEXT:    s_cmp_lt_i32 s0, 31
+; GFX11-FAKE16-NEXT:    s_cmp_gt_i32 s0, 30
 ; GFX11-FAKE16-NEXT:    s_movk_i32 s4, 0x7e00
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s3, s3, 0x7c00
+; GFX11-FAKE16-NEXT:    s_cselect_b32 s3, 0x7c00, s3
 ; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX11-FAKE16-NEXT:    s_cselect_b32 s1, s4, 0x7c00
 ; GFX11-FAKE16-NEXT:    s_cmpk_eq_i32 s0, 0x40f
@@ -3936,8 +3936,8 @@ define amdgpu_ps i32 @s_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> inr
 ; SI-NEXT:    s_or_b32 s6, s6, s7
 ; SI-NEXT:    s_lshr_b32 s4, s4, 2
 ; SI-NEXT:    s_add_i32 s4, s4, s6
-; SI-NEXT:    s_cmp_lt_i32 s5, 31
-; SI-NEXT:    s_cselect_b32 s4, s4, 0x7c00
+; SI-NEXT:    s_cmp_gt_i32 s5, 30
+; SI-NEXT:    s_cselect_b32 s4, 0x7c00, s4
 ; SI-NEXT:    s_cmp_lg_u32 s0, 0
 ; SI-NEXT:    s_movk_i32 s6, 0x7e00
 ; SI-NEXT:    s_cselect_b32 s0, s6, 0x7c00
@@ -3978,8 +3978,8 @@ define amdgpu_ps i32 @s_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> inr
 ; SI-NEXT:    s_or_b32 s5, s5, s7
 ; SI-NEXT:    s_lshr_b32 s1, s1, 2
 ; SI-NEXT:    s_add_i32 s1, s1, s5
-; SI-NEXT:    s_cmp_lt_i32 s2, 31
-; SI-NEXT:    s_cselect_b32 s1, s1, 0x7c00
+; SI-NEXT:    s_cmp_gt_i32 s2, 30
+; SI-NEXT:    s_cselect_b32 s1, 0x7c00, s1
 ; SI-NEXT:    s_cmp_lg_u32 s0, 0
 ; SI-NEXT:    s_cselect_b32 s0, s6, 0x7c00
 ; SI-NEXT:    s_cmpk_eq_i32 s2, 0x40f
@@ -4035,8 +4035,8 @@ define amdgpu_ps i32 @s_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> inr
 ; VI-NEXT:    s_or_b32 s6, s6, s7
 ; VI-NEXT:    s_lshr_b32 s5, s5, 2
 ; VI-NEXT:    s_add_i32 s5, s5, s6
-; VI-NEXT:    s_cmp_lt_i32 s3, 31
-; VI-NEXT:    s_cselect_b32 s5, s5, 0x7c00
+; VI-NEXT:    s_cmp_gt_i32 s3, 30
+; VI-NEXT:    s_cselect_b32 s5, 0x7c00, s5
 ; VI-NEXT:    s_cmp_lg_u32 s2, 0
 ; VI-NEXT:    s_movk_i32 s6, 0x7e00
 ; VI-NEXT:    s_cselect_b32 s2, s6, 0x7c00
@@ -4075,8 +4075,8 @@ define amdgpu_ps i32 @s_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> inr
 ; VI-NEXT:    s_or_b32 s7, s7, s8
 ; VI-NEXT:    s_lshr_b32 s2, s2, 2
 ; VI-NEXT:    s_add_i32 s2, s2, s7
-; VI-NEXT:    s_cmp_lt_i32 s3, 31
-; VI-NEXT:    s_cselect_b32 s2, s2, 0x7c00
+; VI-NEXT:    s_cmp_gt_i32 s3, 30
+; VI-NEXT:    s_cselect_b32 s2, 0x7c00, s2
 ; VI-NEXT:    s_cmp_lg_u32 s0, 0
 ; VI-NEXT:    s_cselect_b32 s0, s6, 0x7c00
 ; VI-NEXT:    s_cmpk_eq_i32 s3, 0x40f
@@ -4127,8 +4127,8 @@ define amdgpu_ps i32 @s_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> inr
 ; GFX9-NEXT:    s_or_b32 s7, s7, s8
 ; GFX9-NEXT:    s_lshr_b32 s5, s5, 2
 ; GFX9-NEXT:    s_add_i32 s5, s5, s7
-; GFX9-NEXT:    s_cmp_lt_i32 s6, 31
-; GFX9-NEXT:    s_cselect_b32 s5, s5, 0x7c00
+; GFX9-NEXT:    s_cmp_gt_i32 s6, 30
+; GFX9-NEXT:    s_cselect_b32 s5, 0x7c00, s5
 ; GFX9-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX9-NEXT:    s_movk_i32 s7, 0x7e00
 ; GFX9-NEXT:    s_cselect_b32 s2, s7, 0x7c00
@@ -4169,8 +4169,8 @@ define amdgpu_ps i32 @s_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> inr
 ; GFX9-NEXT:    s_or_b32 s6, s6, s8
 ; GFX9-NEXT:    s_lshr_b32 s2, s2, 2
 ; GFX9-NEXT:    s_add_i32 s2, s2, s6
-; GFX9-NEXT:    s_cmp_lt_i32 s3, 31
-; GFX9-NEXT:    s_cselect_b32 s2, s2, 0x7c00
+; GFX9-NEXT:    s_cmp_gt_i32 s3, 30
+; GFX9-NEXT:    s_cselect_b32 s2, 0x7c00, s2
 ; GFX9-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX9-NEXT:    s_cselect_b32 s0, s7, 0x7c00
 ; GFX9-NEXT:    s_cmpk_eq_i32 s3, 0x40f
@@ -4227,9 +4227,9 @@ define amdgpu_ps i32 @s_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> inr
 ; GFX11-NEXT:    s_or_b32 s7, s7, s8
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_add_i32 s6, s6, s7
-; GFX11-NEXT:    s_cmp_lt_i32 s2, 31
+; GFX11-NEXT:    s_cmp_gt_i32 s2, 30
 ; GFX11-NEXT:    s_movk_i32 s7, 0x7e00
-; GFX11-NEXT:    s_cselect_b32 s6, s6, 0x7c00
+; GFX11-NEXT:    s_cselect_b32 s6, 0x7c00, s6
 ; GFX11-NEXT:    s_cmp_lg_u32 s5, 0
 ; GFX11-NEXT:    s_cselect_b32 s5, s7, 0x7c00
 ; GFX11-NEXT:    s_cmpk_eq_i32 s2, 0x40f
@@ -4277,8 +4277,8 @@ define amdgpu_ps i32 @s_copysign_out_v2f16_mag_v2f64_sign_v2f16(<2 x double> inr
 ; GFX11-NEXT:    s_or_b32 s6, s6, s8
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_add_i32 s5, s5, s6
-; GFX11-NEXT:    s_cmp_lt_i32 s0, 31
-; GFX11-NEXT:    s_cselect_b32 s5, s5, 0x7c00
+; GFX11-NEXT:    s_cmp_gt_i32 s0, 30
+; GFX11-NEXT:    s_cselect_b32 s5, 0x7c00, s5
 ; GFX11-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX11-NEXT:    s_cselect_b32 s3, s7, 0x7c00
 ; GFX11-NEXT:    s_cmpk_eq_i32 s0, 0x40f
diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
index ffe0596a95e33..e45dd57554675 100644
--- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
@@ -12225,9 +12225,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr %out, i64 %in, i64 %old
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GCN1-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GCN1-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GCN1-NEXT:    v_cmp_ne_u64_e32 vcc, s[4:5], v[0:1]
+; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
 ; GCN1-NEXT:    buffer_store_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    s_endpgm
@@ -12278,9 +12278,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr %out, i64 %in, i64 %old
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GCN2-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GCN2-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GCN2-NEXT:    v_cmp_ne_u64_e32 vcc, s[4:5], v[0:1]
+; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
 ; GCN2-NEXT:    buffer_store_dword v1, v3, s[88:91], 0 offen
 ; GCN2-NEXT:    s_endpgm
@@ -12317,9 +12317,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr %out, i64 %in, i64 %old
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1]
-; GFX12-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
-; GFX12-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
+; GFX12-NEXT:    v_cmp_ne_u64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
+; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
 ; GFX12-NEXT:    s_endpgm
 entry:
@@ -12376,9 +12376,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr %out, i64 %in, i64 %ol
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GCN1-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GCN1-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GCN1-NEXT:    v_cmp_ne_u64_e32 vcc, s[4:5], v[0:1]
+; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
 ; GCN1-NEXT:    buffer_store_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    s_endpgm
@@ -12429,9 +12429,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr %out, i64 %in, i64 %ol
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GCN2-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GCN2-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GCN2-NEXT:    v_cmp_ne_u64_e32 vcc, s[4:5], v[0:1]
+; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
 ; GCN2-NEXT:    buffer_store_dword v1, v3, s[88:91], 0 offen
 ; GCN2-NEXT:    s_endpgm
@@ -12468,9 +12468,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr %out, i64 %in, i64 %ol
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1]
-; GFX12-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
-; GFX12-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
+; GFX12-NEXT:    v_cmp_ne_u64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
+; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
 ; GFX12-NEXT:    s_endpgm
 entry:
@@ -12680,9 +12680,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(ptr %out, i64 %in, i
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[16:19], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s11
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_eq_u64_e32 vcc, s[14:15], v[0:1]
-; GCN1-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GCN1-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GCN1-NEXT:    v_cmp_ne_u64_e32 vcc, s[14:15], v[0:1]
+; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[16:19], 0 offen
 ; GCN1-NEXT:    buffer_store_dword v1, v3, s[16:19], 0 offen
 ; GCN1-NEXT:    s_endpgm
@@ -12735,9 +12735,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(ptr %out, i64 %in, i
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s10
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s11
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_eq_u64_e32 vcc, s[14:15], v[0:1]
-; GCN2-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GCN2-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GCN2-NEXT:    v_cmp_ne_u64_e32 vcc, s[14:15], v[0:1]
+; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
 ; GCN2-NEXT:    buffer_store_dword v1, v3, s[88:91], 0 offen
 ; GCN2-NEXT:    s_endpgm
@@ -12775,9 +12775,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(ptr %out, i64 %in, i
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[6:7], v[0:1]
-; GFX12-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
-; GFX12-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
+; GFX12-NEXT:    v_cmp_ne_u64_e32 vcc_lo, s[6:7], v[0:1]
+; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
+; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
 ; GFX12-NEXT:    s_endpgm
 entry:
@@ -12998,9 +12998,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64(ptr %out, i64 %in, i64 %old) {
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GCN1-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GCN1-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GCN1-NEXT:    v_cmp_ne_u64_e32 vcc, s[4:5], v[0:1]
+; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[12:15], 0 offen
 ; GCN1-NEXT:    buffer_store_dword v1, v3, s[12:15], 0 offen
 ; GCN1-NEXT:    s_endpgm
@@ -13049,9 +13049,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64(ptr %out, i64 %in, i64 %old) {
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s2
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
-; GCN2-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GCN2-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GCN2-NEXT:    v_cmp_ne_u64_e32 vcc, s[4:5], v[0:1]
+; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
 ; GCN2-NEXT:    buffer_store_dword v1, v3, s[88:91], 0 offen
 ; GCN2-NEXT:    s_endpgm
@@ -13087,9 +13087,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64(ptr %out, i64 %in, i64 %old) {
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1]
-; GFX12-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
-; GFX12-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
+; GFX12-NEXT:    v_cmp_ne_u64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
+; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
 ; GFX12-NEXT:    s_endpgm
 entry:
@@ -13290,9 +13290,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr %out, i64 %in, i64 %ind
 ; GCN1-NEXT:    buffer_load_dword v1, v3, s[16:19], 0 offen
 ; GCN1-NEXT:    v_mov_b32_e32 v4, s11
 ; GCN1-NEXT:    s_waitcnt vmcnt(0)
-; GCN1-NEXT:    v_cmp_eq_u64_e32 vcc, s[14:15], v[0:1]
-; GCN1-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GCN1-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GCN1-NEXT:    v_cmp_ne_u64_e32 vcc, s[14:15], v[0:1]
+; GCN1-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; GCN1-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN1-NEXT:    buffer_store_dword v0, v2, s[16:19], 0 offen
 ; GCN1-NEXT:    buffer_store_dword v1, v3, s[16:19], 0 offen
 ; GCN1-NEXT:    s_endpgm
@@ -13343,9 +13343,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr %out, i64 %in, i64 %ind
 ; GCN2-NEXT:    v_mov_b32_e32 v5, s10
 ; GCN2-NEXT:    v_mov_b32_e32 v4, s11
 ; GCN2-NEXT:    s_waitcnt vmcnt(0)
-; GCN2-NEXT:    v_cmp_eq_u64_e32 vcc, s[14:15], v[0:1]
-; GCN2-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GCN2-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GCN2-NEXT:    v_cmp_ne_u64_e32 vcc, s[14:15], v[0:1]
+; GCN2-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; GCN2-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GCN2-NEXT:    buffer_store_dword v0, v2, s[88:91], 0 offen
 ; GCN2-NEXT:    buffer_store_dword v1, v3, s[88:91], 0 offen
 ; GCN2-NEXT:    s_endpgm
@@ -13382,9 +13382,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr %out, i64 %in, i64 %ind
 ; GFX12-NEXT:    s_cselect_b32 s0, s0, -1
 ; GFX12-NEXT:    scratch_load_b64 v[0:1], off, s0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[6:7], v[0:1]
-; GFX12-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
-; GFX12-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
+; GFX12-NEXT:    v_cmp_ne_u64_e32 vcc_lo, s[6:7], v[0:1]
+; GFX12-NEXT:    v_cndmask_b32_e32 v1, s3, v1, vcc_lo
+; GFX12-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX12-NEXT:    scratch_store_b64 off, v[0:1], s0
 ; GFX12-NEXT:    s_endpgm
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
index 1da621cb9f09d..1056edd4cb7fb 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
@@ -202,7 +202,6 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(ptr addrspace(1) %out, ptr
 ; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_f32_multi_use:
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
-; GCN-NOT: v_max_
 ; GCN: v_cmp_gt_f32
 ; GCN-NEXT: v_cndmask_b32
 ; GCN-NOT: v_max_
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index 8a17a759ac334..bcfc5bbb9dc3b 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -89,13 +89,13 @@ define amdgpu_ps i32 @s_fmaximum3_f32(float inreg %a, float inreg %b, float inre
 ; GFX942-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX942-NEXT:    v_max_f32_e32 v1, s0, v0
 ; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, s0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX942-NEXT:    v_max_f32_e32 v1, s2, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s2, v0
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, s2, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX942-NEXT:    s_nop 0
 ; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX942-NEXT:    ; return to shader part epilog
@@ -1360,13 +1360,13 @@ define amdgpu_ps i32 @s_fmaximum3_f16(half inreg %a, half inreg %b, half inreg %
 ; GFX942-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX942-NEXT:    v_max_f16_e32 v1, s0, v0
 ; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, s0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX942-NEXT:    v_max_f16_e32 v1, s2, v0
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, s2, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, s2, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX942-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX942-NEXT:    s_nop 0
 ; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
@@ -3795,14 +3795,14 @@ define amdgpu_ps <2 x i32> @s_no_fmaximum3_f32__multi_use(float inreg %a, float
 ; GFX942-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX942-NEXT:    v_max_f32_e32 v1, s0, v0
 ; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, s0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX942-NEXT:    v_max_f32_e32 v1, s2, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s2, v0
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, s2, v0
 ; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX942-NEXT:    s_nop 0
 ; GFX942-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX942-NEXT:    ; return to shader part epilog
@@ -3899,14 +3899,14 @@ define amdgpu_ps <2 x i32> @s_no_fmaximum3_f16__multi_use(half inreg %a, half in
 ; GFX942-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX942-NEXT:    v_max_f16_e32 v1, s0, v0
 ; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, s0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX942-NEXT:    v_max_f16_e32 v1, s2, v0
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, s2, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, s2, v0
 ; GFX942-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX942-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX942-NEXT:    v_readfirstlane_b32 s1, v1
diff --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
index 8e595a827c78d..e841f03a2b51e 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
@@ -259,10 +259,8 @@ define amdgpu_kernel void @test_fmin_legacy_ult_v3f32(ptr addrspace(1) %out, ptr
 ; FUNC-LABEL: {{^}}test_fmin_legacy_ole_f32_multi_use:
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
-; GCN-NOT: v_min
 ; GCN: v_cmp_le_f32
 ; GCN-NEXT: v_cndmask_b32
-; GCN-NOT: v_min
 ; GCN: s_endpgm
 define amdgpu_kernel void @test_fmin_legacy_ole_f32_multi_use(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
index 58d89d4076376..aa5dd1b73fb38 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
@@ -89,13 +89,13 @@ define amdgpu_ps i32 @s_fminimum3_f32(float inreg %a, float inreg %b, float inre
 ; GFX942-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX942-NEXT:    v_min_f32_e32 v1, s0, v0
 ; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, s0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX942-NEXT:    v_min_f32_e32 v1, s2, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s2, v0
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, s2, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX942-NEXT:    s_nop 0
 ; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX942-NEXT:    ; return to shader part epilog
@@ -1360,13 +1360,13 @@ define amdgpu_ps i32 @s_fminimum3_f16(half inreg %a, half inreg %b, half inreg %
 ; GFX942-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX942-NEXT:    v_min_f16_e32 v1, s0, v0
 ; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, s0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX942-NEXT:    v_min_f16_e32 v1, s2, v0
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, s2, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, s2, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX942-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX942-NEXT:    s_nop 0
 ; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
@@ -3795,14 +3795,14 @@ define amdgpu_ps <2 x i32> @s_no_fminimum3_f32__multi_use(float inreg %a, float
 ; GFX942-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX942-NEXT:    v_min_f32_e32 v1, s0, v0
 ; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s0, v0
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, s0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX942-NEXT:    v_min_f32_e32 v1, s2, v0
-; GFX942-NEXT:    v_cmp_o_f32_e32 vcc, s2, v0
+; GFX942-NEXT:    v_cmp_u_f32_e32 vcc, s2, v0
 ; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX942-NEXT:    s_nop 0
 ; GFX942-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX942-NEXT:    ; return to shader part epilog
@@ -3899,14 +3899,14 @@ define amdgpu_ps <2 x i32> @s_no_fminimum3_f16__multi_use(half inreg %a, half in
 ; GFX942-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX942-NEXT:    v_min_f16_e32 v1, s0, v0
 ; GFX942-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, s0, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, s0, v0
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX942-NEXT:    v_min_f16_e32 v1, s2, v0
-; GFX942-NEXT:    v_cmp_o_f16_e32 vcc, s2, v0
+; GFX942-NEXT:    v_cmp_u_f16_e32 vcc, s2, v0
 ; GFX942-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX942-NEXT:    s_nop 0
-; GFX942-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX942-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX942-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX942-NEXT:    v_readfirstlane_b32 s1, v1
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.ll
index 49c563eef5d82..44783162fcb3d 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.ll
@@ -133,8 +133,8 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
 ; SI-NEXT:    s_lshr_b32 s6, s6, 2
 ; SI-NEXT:    s_or_b32 s8, s8, s9
 ; SI-NEXT:    s_add_i32 s6, s6, s8
-; SI-NEXT:    s_cmp_lt_i32 s0, 31
-; SI-NEXT:    s_cselect_b32 s6, s6, 0x7c00
+; SI-NEXT:    s_cmp_gt_i32 s0, 30
+; SI-NEXT:    s_cselect_b32 s6, 0x7c00, s6
 ; SI-NEXT:    s_cmp_lg_u32 s1, 0
 ; SI-NEXT:    s_cselect_b32 s1, s2, 0x7c00
 ; SI-NEXT:    s_cmpk_eq_i32 s0, 0x40f
@@ -189,8 +189,8 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
 ; VI-SAFE-SDAG-NEXT:    s_or_b32 s8, s8, s9
 ; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s5, s5, 2
 ; VI-SAFE-SDAG-NEXT:    s_add_i32 s5, s5, s8
-; VI-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s6, 31
-; VI-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, 0x7c00
+; VI-SAFE-SDAG-NEXT:    s_cmp_gt_i32 s6, 30
+; VI-SAFE-SDAG-NEXT:    s_cselect_b32 s5, 0x7c00, s5
 ; VI-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
 ; VI-SAFE-SDAG-NEXT:    s_movk_i32 s4, 0x7e00
 ; VI-SAFE-SDAG-NEXT:    s_cselect_b32 s4, s4, 0x7c00
@@ -312,9 +312,9 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
 ; GFX10-SAFE-SDAG-NEXT:    s_lshr_b32 s5, s5, 2
 ; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s6, s6, s7
 ; GFX10-SAFE-SDAG-NEXT:    s_add_i32 s5, s5, s6
-; GFX10-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 31
+; GFX10-SAFE-SDAG-NEXT:    s_cmp_gt_i32 s2, 30
 ; GFX10-SAFE-SDAG-NEXT:    s_movk_i32 s6, 0x7e00
-; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, 0x7c00
+; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s5, 0x7c00, s5
 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s4, s6, 0x7c00
 ; GFX10-SAFE-SDAG-NEXT:    s_cmpk_eq_i32 s2, 0x40f
@@ -444,9 +444,9 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
 ; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s6, s6, s7
 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-SAFE-SDAG-NEXT:    s_add_i32 s5, s5, s6
-; GFX11-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 31
+; GFX11-SAFE-SDAG-NEXT:    s_cmp_gt_i32 s2, 30
 ; GFX11-SAFE-SDAG-NEXT:    s_movk_i32 s6, 0x7e00
-; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, 0x7c00
+; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s5, 0x7c00, s5
 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s4, s6, 0x7c00
 ; GFX11-SAFE-SDAG-NEXT:    s_cmpk_eq_i32 s2, 0x40f
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
index b5665835eaf7a..d41e7ccf803dc 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -33,48 +33,48 @@ define amdgpu_kernel void @extract_w_offset(ptr addrspace(1) %out, i32 %in) {
 ; GENERIC-NEXT:    s_cmp_eq_u32 s6, 1
 ; GENERIC-NEXT:    s_cselect_b64 s[4:5], -1, 0
 ; GENERIC-NEXT:    v_cndmask_b32_e64 v13, 1.0, 2.0, s[4:5]
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 2
-; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 3
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 2
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, 4.0, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 4
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v13, v0, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 3
+; GENERIC-NEXT:    s_cselect_b64 s[4:5], -1, 0
+; GENERIC-NEXT:    v_cndmask_b32_e64 v0, v0, 4.0, s[4:5]
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 4
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 5
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 5
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 6
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 6
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 7
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 7
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 8
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 8
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 9
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 9
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 10
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 10
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 11
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 11
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 12
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 12
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v9, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 13
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 13
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 14
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 14
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v11, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 15
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 15
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v12, v0, vcc
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
 ; GENERIC-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GENERIC-NEXT:    s_endpgm
 ;
@@ -643,48 +643,48 @@ define amdgpu_kernel void @extract_wo_offset(ptr addrspace(1) %out, i32 %in) {
 ; GENERIC-NEXT:    s_cmp_eq_u32 s6, 1
 ; GENERIC-NEXT:    s_cselect_b64 s[4:5], -1, 0
 ; GENERIC-NEXT:    v_cndmask_b32_e64 v13, 1.0, 2.0, s[4:5]
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 2
-; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 3
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 2
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, 4.0, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 4
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v13, v0, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 3
+; GENERIC-NEXT:    s_cselect_b64 s[4:5], -1, 0
+; GENERIC-NEXT:    v_cndmask_b32_e64 v0, v0, 4.0, s[4:5]
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 4
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 5
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 5
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 6
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 6
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 7
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 7
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 8
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 8
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 9
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 9
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 10
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 10
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 11
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 11
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 12
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 12
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v9, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 13
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 13
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 14
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 14
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v11, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s6, 15
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 15
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v12, v0, vcc
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
 ; GENERIC-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GENERIC-NEXT:    s_endpgm
 ;
@@ -882,34 +882,34 @@ define amdgpu_kernel void @extract_neg_offset_sgpr(ptr addrspace(1) %out, i32 %o
 ; GENERIC-NEXT:    s_addk_i32 s2, 0xfe00
 ; GENERIC-NEXT:    s_cmp_eq_u32 s2, 1
 ; GENERIC-NEXT:    s_cselect_b32 s4, 1, 0
-; GENERIC-NEXT:    s_cmp_lg_u32 s2, 2
-; GENERIC-NEXT:    s_cselect_b32 s4, s4, 2
-; GENERIC-NEXT:    s_cmp_lg_u32 s2, 3
-; GENERIC-NEXT:    s_cselect_b32 s4, s4, 3
-; GENERIC-NEXT:    s_cmp_lg_u32 s2, 4
-; GENERIC-NEXT:    s_cselect_b32 s4, s4, 5
-; GENERIC-NEXT:    s_cmp_lg_u32 s2, 5
-; GENERIC-NEXT:    s_cselect_b32 s4, s4, 6
-; GENERIC-NEXT:    s_cmp_lg_u32 s2, 6
-; GENERIC-NEXT:    s_cselect_b32 s4, s4, 7
-; GENERIC-NEXT:    s_cmp_lg_u32 s2, 7
-; GENERIC-NEXT:    s_cselect_b32 s4, s4, 8
-; GENERIC-NEXT:    s_cmp_lg_u32 s2, 8
-; GENERIC-NEXT:    s_cselect_b32 s4, s4, 9
-; GENERIC-NEXT:    s_cmp_lg_u32 s2, 9
-; GENERIC-NEXT:    s_cselect_b32 s4, s4, 10
-; GENERIC-NEXT:    s_cmp_lg_u32 s2, 10
-; GENERIC-NEXT:    s_cselect_b32 s4, s4, 11
-; GENERIC-NEXT:    s_cmp_lg_u32 s2, 11
-; GENERIC-NEXT:    s_cselect_b32 s4, s4, 12
-; GENERIC-NEXT:    s_cmp_lg_u32 s2, 12
-; GENERIC-NEXT:    s_cselect_b32 s4, s4, 13
-; GENERIC-NEXT:    s_cmp_lg_u32 s2, 13
-; GENERIC-NEXT:    s_cselect_b32 s4, s4, 14
-; GENERIC-NEXT:    s_cmp_lg_u32 s2, 14
-; GENERIC-NEXT:    s_cselect_b32 s4, s4, 15
-; GENERIC-NEXT:    s_cmp_lg_u32 s2, 15
-; GENERIC-NEXT:    s_cselect_b32 s4, s4, 16
+; GENERIC-NEXT:    s_cmp_eq_u32 s2, 2
+; GENERIC-NEXT:    s_cselect_b32 s4, 2, s4
+; GENERIC-NEXT:    s_cmp_eq_u32 s2, 3
+; GENERIC-NEXT:    s_cselect_b32 s4, 3, s4
+; GENERIC-NEXT:    s_cmp_eq_u32 s2, 4
+; GENERIC-NEXT:    s_cselect_b32 s4, 5, s4
+; GENERIC-NEXT:    s_cmp_eq_u32 s2, 5
+; GENERIC-NEXT:    s_cselect_b32 s4, 6, s4
+; GENERIC-NEXT:    s_cmp_eq_u32 s2, 6
+; GENERIC-NEXT:    s_cselect_b32 s4, 7, s4
+; GENERIC-NEXT:    s_cmp_eq_u32 s2, 7
+; GENERIC-NEXT:    s_cselect_b32 s4, 8, s4
+; GENERIC-NEXT:    s_cmp_eq_u32 s2, 8
+; GENERIC-NEXT:    s_cselect_b32 s4, 9, s4
+; GENERIC-NEXT:    s_cmp_eq_u32 s2, 9
+; GENERIC-NEXT:    s_cselect_b32 s4, 10, s4
+; GENERIC-NEXT:    s_cmp_eq_u32 s2, 10
+; GENERIC-NEXT:    s_cselect_b32 s4, 11, s4
+; GENERIC-NEXT:    s_cmp_eq_u32 s2, 11
+; GENERIC-NEXT:    s_cselect_b32 s4, 12, s4
+; GENERIC-NEXT:    s_cmp_eq_u32 s2, 12
+; GENERIC-NEXT:    s_cselect_b32 s4, 13, s4
+; GENERIC-NEXT:    s_cmp_eq_u32 s2, 13
+; GENERIC-NEXT:    s_cselect_b32 s4, 14, s4
+; GENERIC-NEXT:    s_cmp_eq_u32 s2, 14
+; GENERIC-NEXT:    s_cselect_b32 s4, 15, s4
+; GENERIC-NEXT:    s_cmp_eq_u32 s2, 15
+; GENERIC-NEXT:    s_cselect_b32 s4, 16, s4
 ; GENERIC-NEXT:    s_mov_b32 s2, -1
 ; GENERIC-NEXT:    v_mov_b32_e32 v0, s4
 ; GENERIC-NEXT:    buffer_store_dword v0, off, s[0:3], 0
@@ -3656,52 +3656,52 @@ define amdgpu_kernel void @insert_neg_offset_sgpr_loadreg(ptr addrspace(1) %in,
 ; GENERIC-NEXT:    s_mov_b32 s18, -1
 ; GENERIC-NEXT:    s_waitcnt lgkmcnt(0)
 ; GENERIC-NEXT:    s_addk_i32 s20, 0xfe00
-; GENERIC-NEXT:    s_cmp_lg_u32 s20, 3
-; GENERIC-NEXT:    s_cselect_b32 s3, s3, 5
-; GENERIC-NEXT:    s_cmp_lg_u32 s20, 2
-; GENERIC-NEXT:    s_cselect_b32 s2, s2, 5
-; GENERIC-NEXT:    s_cmp_lg_u32 s20, 1
+; GENERIC-NEXT:    s_cmp_eq_u32 s20, 3
+; GENERIC-NEXT:    s_cselect_b32 s3, 5, s3
+; GENERIC-NEXT:    s_cmp_eq_u32 s20, 2
+; GENERIC-NEXT:    s_cselect_b32 s2, 5, s2
+; GENERIC-NEXT:    s_cmp_eq_u32 s20, 1
 ; GENERIC-NEXT:    v_mov_b32_e32 v3, s3
-; GENERIC-NEXT:    s_cselect_b32 s1, s1, 5
-; GENERIC-NEXT:    s_cmp_lg_u32 s20, 0
+; GENERIC-NEXT:    s_cselect_b32 s1, 5, s1
+; GENERIC-NEXT:    s_cmp_eq_u32 s20, 0
 ; GENERIC-NEXT:    v_mov_b32_e32 v2, s2
-; GENERIC-NEXT:    s_cselect_b32 s0, s0, 5
-; GENERIC-NEXT:    s_cmp_lg_u32 s20, 7
+; GENERIC-NEXT:    s_cselect_b32 s0, 5, s0
+; GENERIC-NEXT:    s_cmp_eq_u32 s20, 7
 ; GENERIC-NEXT:    v_mov_b32_e32 v1, s1
-; GENERIC-NEXT:    s_cselect_b32 s1, s7, 5
-; GENERIC-NEXT:    s_cmp_lg_u32 s20, 6
+; GENERIC-NEXT:    s_cselect_b32 s1, 5, s7
+; GENERIC-NEXT:    s_cmp_eq_u32 s20, 6
 ; GENERIC-NEXT:    v_mov_b32_e32 v0, s0
-; GENERIC-NEXT:    s_cselect_b32 s0, s6, 5
-; GENERIC-NEXT:    s_cmp_lg_u32 s20, 5
+; GENERIC-NEXT:    s_cselect_b32 s0, 5, s6
+; GENERIC-NEXT:    s_cmp_eq_u32 s20, 5
 ; GENERIC-NEXT:    v_mov_b32_e32 v7, s1
-; GENERIC-NEXT:    s_cselect_b32 s1, s5, 5
-; GENERIC-NEXT:    s_cmp_lg_u32 s20, 4
+; GENERIC-NEXT:    s_cselect_b32 s1, 5, s5
+; GENERIC-NEXT:    s_cmp_eq_u32 s20, 4
 ; GENERIC-NEXT:    v_mov_b32_e32 v6, s0
-; GENERIC-NEXT:    s_cselect_b32 s0, s4, 5
-; GENERIC-NEXT:    s_cmp_lg_u32 s20, 11
+; GENERIC-NEXT:    s_cselect_b32 s0, 5, s4
+; GENERIC-NEXT:    s_cmp_eq_u32 s20, 11
 ; GENERIC-NEXT:    v_mov_b32_e32 v5, s1
-; GENERIC-NEXT:    s_cselect_b32 s1, s11, 5
-; GENERIC-NEXT:    s_cmp_lg_u32 s20, 10
+; GENERIC-NEXT:    s_cselect_b32 s1, 5, s11
+; GENERIC-NEXT:    s_cmp_eq_u32 s20, 10
 ; GENERIC-NEXT:    v_mov_b32_e32 v4, s0
 ; GENERIC-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16
-; GENERIC-NEXT:    s_cselect_b32 s0, s10, 5
-; GENERIC-NEXT:    s_cmp_lg_u32 s20, 9
+; GENERIC-NEXT:    s_cselect_b32 s0, 5, s10
+; GENERIC-NEXT:    s_cmp_eq_u32 s20, 9
 ; GENERIC-NEXT:    s_waitcnt expcnt(0)
 ; GENERIC-NEXT:    v_mov_b32_e32 v7, s1
-; GENERIC-NEXT:    s_cselect_b32 s1, s9, 5
-; GENERIC-NEXT:    s_cmp_lg_u32 s20, 8
+; GENERIC-NEXT:    s_cselect_b32 s1, 5, s9
+; GENERIC-NEXT:    s_cmp_eq_u32 s20, 8
 ; GENERIC-NEXT:    v_mov_b32_e32 v6, s0
-; GENERIC-NEXT:    s_cselect_b32 s0, s8, 5
-; GENERIC-NEXT:    s_cmp_lg_u32 s20, 15
+; GENERIC-NEXT:    s_cselect_b32 s0, 5, s8
+; GENERIC-NEXT:    s_cmp_eq_u32 s20, 15
 ; GENERIC-NEXT:    v_mov_b32_e32 v5, s1
-; GENERIC-NEXT:    s_cselect_b32 s1, s15, 5
-; GENERIC-NEXT:    s_cmp_lg_u32 s20, 14
+; GENERIC-NEXT:    s_cselect_b32 s1, 5, s15
+; GENERIC-NEXT:    s_cmp_eq_u32 s20, 14
 ; GENERIC-NEXT:    v_mov_b32_e32 v4, s0
-; GENERIC-NEXT:    s_cselect_b32 s0, s14, 5
-; GENERIC-NEXT:    s_cmp_lg_u32 s20, 13
-; GENERIC-NEXT:    s_cselect_b32 s2, s13, 5
-; GENERIC-NEXT:    s_cmp_lg_u32 s20, 12
-; GENERIC-NEXT:    s_cselect_b32 s3, s12, 5
+; GENERIC-NEXT:    s_cselect_b32 s0, 5, s14
+; GENERIC-NEXT:    s_cmp_eq_u32 s20, 13
+; GENERIC-NEXT:    s_cselect_b32 s2, 5, s13
+; GENERIC-NEXT:    s_cmp_eq_u32 s20, 12
+; GENERIC-NEXT:    s_cselect_b32 s3, 5, s12
 ; GENERIC-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:32
 ; GENERIC-NEXT:    s_waitcnt expcnt(0)
 ; GENERIC-NEXT:    v_mov_b32_e32 v7, s1
@@ -6675,38 +6675,38 @@ define amdgpu_kernel void @insert_w_offset_multiple_in_block(ptr addrspace(1) %o
 ; GENERIC-NEXT:    s_cselect_b64 s[22:23], -1, 0
 ; GENERIC-NEXT:    v_cndmask_b32_e64 v16, 4.0, v0, s[22:23]
 ; GENERIC-NEXT:    s_add_i32 s26, s24, 2
-; GENERIC-NEXT:    s_cmp_lg_u32 s26, 3
+; GENERIC-NEXT:    s_cmp_eq_u32 s26, 3
 ; GENERIC-NEXT:    buffer_store_dwordx4 v[13:16], off, s[28:31], 0
 ; GENERIC-NEXT:    s_cselect_b64 s[22:23], -1, 0
-; GENERIC-NEXT:    s_cmp_lg_u32 s26, 2
+; GENERIC-NEXT:    s_cmp_eq_u32 s26, 2
 ; GENERIC-NEXT:    s_cselect_b64 s[24:25], -1, 0
 ; GENERIC-NEXT:    s_waitcnt expcnt(0)
-; GENERIC-NEXT:    v_cndmask_b32_e64 v16, v0, v16, s[22:23]
-; GENERIC-NEXT:    v_cndmask_b32_e64 v15, v0, v15, s[24:25]
-; GENERIC-NEXT:    s_cmp_lg_u32 s26, 1
+; GENERIC-NEXT:    v_cndmask_b32_e64 v16, v16, v0, s[22:23]
+; GENERIC-NEXT:    v_cndmask_b32_e64 v15, v15, v0, s[24:25]
+; GENERIC-NEXT:    s_cmp_eq_u32 s26, 1
 ; GENERIC-NEXT:    s_cselect_b64 s[22:23], -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e64 v14, v0, v14, s[22:23]
-; GENERIC-NEXT:    s_cmp_lg_u32 s26, 0
+; GENERIC-NEXT:    v_cndmask_b32_e64 v14, v14, v0, s[22:23]
+; GENERIC-NEXT:    s_cmp_eq_u32 s26, 0
 ; GENERIC-NEXT:    s_cselect_b64 s[22:23], -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e64 v13, v0, v13, s[22:23]
+; GENERIC-NEXT:    v_cndmask_b32_e64 v13, v13, v0, s[22:23]
 ; GENERIC-NEXT:    v_cndmask_b32_e64 v9, v9, v0, s[14:15]
 ; GENERIC-NEXT:    v_cndmask_b32_e64 v10, v10, v0, s[16:17]
 ; GENERIC-NEXT:    v_cndmask_b32_e64 v11, v11, v0, s[18:19]
 ; GENERIC-NEXT:    v_cndmask_b32_e64 v12, v12, v0, s[20:21]
 ; GENERIC-NEXT:    buffer_store_dwordx4 v[9:12], off, s[28:31], 0 offset:16
-; GENERIC-NEXT:    s_cmp_lg_u32 s26, 7
+; GENERIC-NEXT:    s_cmp_eq_u32 s26, 7
 ; GENERIC-NEXT:    s_cselect_b64 s[14:15], -1, 0
-; GENERIC-NEXT:    s_cmp_lg_u32 s26, 6
+; GENERIC-NEXT:    s_cmp_eq_u32 s26, 6
 ; GENERIC-NEXT:    s_cselect_b64 s[16:17], -1, 0
 ; GENERIC-NEXT:    s_waitcnt expcnt(0)
-; GENERIC-NEXT:    v_cndmask_b32_e64 v12, v0, v12, s[14:15]
-; GENERIC-NEXT:    v_cndmask_b32_e64 v11, v0, v11, s[16:17]
-; GENERIC-NEXT:    s_cmp_lg_u32 s26, 5
+; GENERIC-NEXT:    v_cndmask_b32_e64 v12, v12, v0, s[14:15]
+; GENERIC-NEXT:    v_cndmask_b32_e64 v11, v11, v0, s[16:17]
+; GENERIC-NEXT:    s_cmp_eq_u32 s26, 5
 ; GENERIC-NEXT:    s_cselect_b64 s[14:15], -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e64 v10, v0, v10, s[14:15]
-; GENERIC-NEXT:    s_cmp_lg_u32 s26, 4
+; GENERIC-NEXT:    v_cndmask_b32_e64 v10, v10, v0, s[14:15]
+; GENERIC-NEXT:    s_cmp_eq_u32 s26, 4
 ; GENERIC-NEXT:    s_cselect_b64 s[14:15], -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e64 v9, v0, v9, s[14:15]
+; GENERIC-NEXT:    v_cndmask_b32_e64 v9, v9, v0, s[14:15]
 ; GENERIC-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
 ; GENERIC-NEXT:    v_cndmask_b32_e64 v2, v2, v0, s[0:1]
 ; GENERIC-NEXT:    v_cndmask_b32_e64 v3, v3, v0, s[2:3]
@@ -6717,33 +6717,33 @@ define amdgpu_kernel void @insert_w_offset_multiple_in_block(ptr addrspace(1) %o
 ; GENERIC-NEXT:    v_cndmask_b32_e64 v7, v7, v0, s[10:11]
 ; GENERIC-NEXT:    v_cndmask_b32_e64 v8, v8, v0, s[12:13]
 ; GENERIC-NEXT:    buffer_store_dwordx4 v[5:8], off, s[28:31], 0 offset:32
-; GENERIC-NEXT:    s_cmp_lg_u32 s26, 11
+; GENERIC-NEXT:    s_cmp_eq_u32 s26, 11
 ; GENERIC-NEXT:    buffer_store_dwordx4 v[9:12], off, s[28:31], 0 offset:80
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
 ; GENERIC-NEXT:    s_waitcnt expcnt(1)
-; GENERIC-NEXT:    v_cndmask_b32_e32 v8, v0, v8, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s26, 10
+; GENERIC-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s26, 10
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v7, v0, v7, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s26, 9
+; GENERIC-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s26, 9
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v6, v0, v6, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s26, 8
+; GENERIC-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s26, 8
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v5, v0, v5, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s26, 15
+; GENERIC-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s26, 15
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s26, 14
+; GENERIC-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s26, 14
 ; GENERIC-NEXT:    buffer_store_dwordx4 v[5:8], off, s[28:31], 0 offset:96
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v3, v0, v3, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s26, 13
+; GENERIC-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s26, 13
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s26, 12
+; GENERIC-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s26, 12
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
+; GENERIC-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
 ; GENERIC-NEXT:    buffer_store_dwordx4 v[1:4], off, s[28:31], 0 offset:112
 ; GENERIC-NEXT:    buffer_store_dwordx4 v[13:16], off, s[28:31], 0 offset:64
 ; GENERIC-NEXT:    s_endpgm
@@ -8702,58 +8702,58 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(ptr addrspace(1) %out,
 ; GENERIC-NEXT:    v_mov_b32_e32 v18, s21
 ; GENERIC-NEXT:    v_mov_b32_e32 v19, s20
 ; GENERIC-NEXT:    s_or_b32 s4, s4, 1
-; GENERIC-NEXT:    s_cmp_lg_u32 s4, 3
+; GENERIC-NEXT:    s_cmp_eq_u32 s4, 3
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v3, v10, v0, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s4, 2
+; GENERIC-NEXT:    v_cndmask_b32_e32 v3, v0, v10, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s4, 2
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v2, v10, v1, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s4, 1
+; GENERIC-NEXT:    v_cndmask_b32_e32 v2, v1, v10, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s4, 1
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v1, v10, v4, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s4, 0
+; GENERIC-NEXT:    v_cndmask_b32_e32 v1, v4, v10, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s4, 0
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v10, v5, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s4, 7
+; GENERIC-NEXT:    v_cndmask_b32_e32 v0, v5, v10, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s4, 7
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v7, v10, v6, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s4, 6
+; GENERIC-NEXT:    v_cndmask_b32_e32 v7, v6, v10, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s4, 6
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v6, v10, v8, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s4, 5
+; GENERIC-NEXT:    v_cndmask_b32_e32 v6, v8, v10, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s4, 5
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v5, v10, v9, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s4, 4
+; GENERIC-NEXT:    v_cndmask_b32_e32 v5, v9, v10, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s4, 4
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v4, v10, v11, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s4, 11
+; GENERIC-NEXT:    v_cndmask_b32_e32 v4, v11, v10, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s4, 11
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v8, v10, v12, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s4, 10
+; GENERIC-NEXT:    v_cndmask_b32_e32 v8, v12, v10, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s4, 10
 ; GENERIC-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
 ; GENERIC-NEXT:    s_waitcnt expcnt(0)
-; GENERIC-NEXT:    v_cndmask_b32_e32 v7, v10, v13, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s4, 9
+; GENERIC-NEXT:    v_cndmask_b32_e32 v7, v13, v10, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s4, 9
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v6, v10, v14, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s4, 8
+; GENERIC-NEXT:    v_cndmask_b32_e32 v6, v14, v10, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s4, 8
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v5, v10, v15, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s4, 15
+; GENERIC-NEXT:    v_cndmask_b32_e32 v5, v15, v10, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s4, 15
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v9, v10, v16, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s4, 14
+; GENERIC-NEXT:    v_cndmask_b32_e32 v9, v16, v10, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s4, 14
 ; GENERIC-NEXT:    buffer_store_dwordx4 v[5:8], off, s[0:3], 0 offset:32
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
 ; GENERIC-NEXT:    s_waitcnt expcnt(0)
-; GENERIC-NEXT:    v_cndmask_b32_e32 v8, v10, v17, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s4, 13
+; GENERIC-NEXT:    v_cndmask_b32_e32 v8, v17, v10, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s4, 13
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v7, v10, v18, vcc
-; GENERIC-NEXT:    s_cmp_lg_u32 s4, 12
+; GENERIC-NEXT:    v_cndmask_b32_e32 v7, v18, v10, vcc
+; GENERIC-NEXT:    s_cmp_eq_u32 s4, 12
 ; GENERIC-NEXT:    s_cselect_b64 vcc, -1, 0
-; GENERIC-NEXT:    v_cndmask_b32_e32 v6, v10, v19, vcc
+; GENERIC-NEXT:    v_cndmask_b32_e32 v6, v19, v10, vcc
 ; GENERIC-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:48
 ; GENERIC-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GENERIC-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
index 2e037335ce37a..978df53f3acd3 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
@@ -4,27 +4,27 @@
 define amdgpu_kernel void @float4_inselt(ptr addrspace(1) %out, <4 x float> %vec, i32 %sel) {
 ; GCN-LABEL: float4_inselt:
 ; GCN:       ; %bb.0: ; %entry
-; GCN-NEXT:    s_load_dword s6, s[4:5], 0x44
+; GCN-NEXT:    s_load_dword s8, s[4:5], 0x44
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_cmp_lg_u32 s6, 3
+; GCN-NEXT:    s_cmp_eq_u32 s8, 3
 ; GCN-NEXT:    v_mov_b32_e32 v0, s3
-; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s6, 2
-; GCN-NEXT:    v_cndmask_b32_e32 v3, 1.0, v0, vcc
+; GCN-NEXT:    s_cselect_b64 s[6:7], -1, 0
+; GCN-NEXT:    s_cmp_eq_u32 s8, 2
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v0, 1.0, s[6:7]
 ; GCN-NEXT:    v_mov_b32_e32 v0, s2
-; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s6, 1
-; GCN-NEXT:    v_cndmask_b32_e32 v2, 1.0, v0, vcc
+; GCN-NEXT:    s_cselect_b64 s[2:3], -1, 0
+; GCN-NEXT:    s_cmp_eq_u32 s8, 1
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v0, 1.0, s[2:3]
 ; GCN-NEXT:    v_mov_b32_e32 v0, s1
-; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s6, 0
-; GCN-NEXT:    v_cndmask_b32_e32 v1, 1.0, v0, vcc
+; GCN-NEXT:    s_cselect_b64 s[2:3], -1, 0
+; GCN-NEXT:    s_cmp_eq_u32 s8, 0
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v0, 1.0, s[2:3]
 ; GCN-NEXT:    v_mov_b32_e32 v0, s0
-; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
+; GCN-NEXT:    s_cselect_b64 s[0:1], -1, 0
 ; GCN-NEXT:    v_mov_b32_e32 v4, s4
-; GCN-NEXT:    v_cndmask_b32_e32 v0, 1.0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 1.0, s[0:1]
 ; GCN-NEXT:    v_mov_b32_e32 v5, s5
 ; GCN-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GCN-NEXT:    s_endpgm
@@ -60,14 +60,14 @@ define amdgpu_kernel void @int4_inselt(ptr addrspace(1) %out, <4 x i32> %vec, i3
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_cmp_lg_u32 s6, 3
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 2
-; GCN-NEXT:    s_cselect_b32 s2, s2, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 1
-; GCN-NEXT:    s_cselect_b32 s1, s1, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 0
-; GCN-NEXT:    s_cselect_b32 s0, s0, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 3
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
+; GCN-NEXT:    s_cmp_eq_u32 s6, 2
+; GCN-NEXT:    s_cselect_b32 s2, 1, s2
+; GCN-NEXT:    s_cmp_eq_u32 s6, 1
+; GCN-NEXT:    s_cselect_b32 s1, 1, s1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 0
+; GCN-NEXT:    s_cselect_b32 s0, 1, s0
 ; GCN-NEXT:    v_mov_b32_e32 v4, s4
 ; GCN-NEXT:    v_mov_b32_e32 v0, s0
 ; GCN-NEXT:    v_mov_b32_e32 v1, s1
@@ -88,15 +88,15 @@ define amdgpu_kernel void @float2_inselt(ptr addrspace(1) %out, <2 x float> %vec
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x2c
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_cmp_lg_u32 s2, 1
+; GCN-NEXT:    s_cmp_eq_u32 s2, 1
 ; GCN-NEXT:    v_mov_b32_e32 v0, s1
-; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s2, 0
-; GCN-NEXT:    v_cndmask_b32_e32 v1, 1.0, v0, vcc
+; GCN-NEXT:    s_cselect_b64 s[6:7], -1, 0
+; GCN-NEXT:    s_cmp_eq_u32 s2, 0
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v0, 1.0, s[6:7]
 ; GCN-NEXT:    v_mov_b32_e32 v0, s0
-; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
+; GCN-NEXT:    s_cselect_b64 s[0:1], -1, 0
 ; GCN-NEXT:    v_mov_b32_e32 v2, s4
-; GCN-NEXT:    v_cndmask_b32_e32 v0, 1.0, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 1.0, s[0:1]
 ; GCN-NEXT:    v_mov_b32_e32 v3, s5
 ; GCN-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GCN-NEXT:    s_endpgm
@@ -341,47 +341,47 @@ define amdgpu_kernel void @half8_inselt(ptr addrspace(1) %out, <8 x half> %vec,
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0x3c00
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_lshr_b32 s7, s3, 16
-; GCN-NEXT:    s_cmp_lg_u32 s6, 7
+; GCN-NEXT:    s_cmp_eq_u32 s6, 7
 ; GCN-NEXT:    v_mov_b32_e32 v1, s7
 ; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s6, 6
-; GCN-NEXT:    v_cndmask_b32_sdwa v1, v0, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GCN-NEXT:    s_cmp_eq_u32 s6, 6
+; GCN-NEXT:    v_cndmask_b32_sdwa v1, v1, v0, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GCN-NEXT:    v_mov_b32_e32 v2, s3
 ; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
 ; GCN-NEXT:    s_lshr_b32 s3, s2, 16
-; GCN-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
-; GCN-NEXT:    s_cmp_lg_u32 s6, 5
+; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GCN-NEXT:    s_cmp_eq_u32 s6, 5
 ; GCN-NEXT:    v_or_b32_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; GCN-NEXT:    v_mov_b32_e32 v1, s3
 ; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s6, 4
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
+; GCN-NEXT:    s_cmp_eq_u32 s6, 4
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v2, s2
 ; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
 ; GCN-NEXT:    s_lshr_b32 s2, s1, 16
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GCN-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
-; GCN-NEXT:    s_cmp_lg_u32 s6, 3
+; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; GCN-NEXT:    s_cmp_eq_u32 s6, 3
 ; GCN-NEXT:    v_or_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; GCN-NEXT:    v_mov_b32_e32 v1, s2
 ; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s6, 2
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
+; GCN-NEXT:    s_cmp_eq_u32 s6, 2
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v0, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v4, s1
 ; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
 ; GCN-NEXT:    s_lshr_b32 s1, s0, 16
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GCN-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
-; GCN-NEXT:    s_cmp_lg_u32 s6, 1
+; GCN-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
+; GCN-NEXT:    s_cmp_eq_u32 s6, 1
 ; GCN-NEXT:    v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; GCN-NEXT:    v_mov_b32_e32 v4, s1
 ; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s6, 0
-; GCN-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
+; GCN-NEXT:    s_cmp_eq_u32 s6, 0
+; GCN-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v5, s0
 ; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
 ; GCN-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; GCN-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; GCN-NEXT:    v_mov_b32_e32 v4, s4
 ; GCN-NEXT:    v_mov_b32_e32 v5, s5
@@ -471,81 +471,81 @@ define amdgpu_kernel void @byte16_inselt(ptr addrspace(1) %out, <16 x i8> %vec,
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_lshr_b32 s7, s3, 24
-; GCN-NEXT:    s_cmp_lg_u32 s6, 15
-; GCN-NEXT:    s_cselect_b32 s7, s7, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 15
+; GCN-NEXT:    s_cselect_b32 s7, 1, s7
 ; GCN-NEXT:    s_lshr_b32 s8, s3, 16
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 8
-; GCN-NEXT:    s_cmp_lg_u32 s6, 14
-; GCN-NEXT:    s_cselect_b32 s8, s8, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 14
+; GCN-NEXT:    s_cselect_b32 s8, 1, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 0xff
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_lshr_b32 s9, s3, 8
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 16
-; GCN-NEXT:    s_cmp_lg_u32 s6, 13
-; GCN-NEXT:    s_cselect_b32 s8, s9, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 13
+; GCN-NEXT:    s_cselect_b32 s8, 1, s9
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 8
-; GCN-NEXT:    s_cmp_lg_u32 s6, 12
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 12
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
 ; GCN-NEXT:    s_and_b32 s3, s3, 0xff
 ; GCN-NEXT:    s_or_b32 s3, s3, s8
 ; GCN-NEXT:    s_and_b32 s3, s3, 0xffff
 ; GCN-NEXT:    s_or_b32 s3, s3, s7
 ; GCN-NEXT:    s_lshr_b32 s7, s2, 24
-; GCN-NEXT:    s_cmp_lg_u32 s6, 11
-; GCN-NEXT:    s_cselect_b32 s7, s7, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 11
+; GCN-NEXT:    s_cselect_b32 s7, 1, s7
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 8
 ; GCN-NEXT:    s_lshr_b32 s8, s2, 16
-; GCN-NEXT:    s_cmp_lg_u32 s6, 10
-; GCN-NEXT:    s_cselect_b32 s8, s8, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 10
+; GCN-NEXT:    s_cselect_b32 s8, 1, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 0xff
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 16
 ; GCN-NEXT:    s_lshr_b32 s8, s2, 8
-; GCN-NEXT:    s_cmp_lg_u32 s6, 9
-; GCN-NEXT:    s_cselect_b32 s8, s8, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 9
+; GCN-NEXT:    s_cselect_b32 s8, 1, s8
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 8
-; GCN-NEXT:    s_cmp_lg_u32 s6, 8
-; GCN-NEXT:    s_cselect_b32 s2, s2, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 8
+; GCN-NEXT:    s_cselect_b32 s2, 1, s2
 ; GCN-NEXT:    s_and_b32 s2, s2, 0xff
 ; GCN-NEXT:    s_or_b32 s2, s2, s8
 ; GCN-NEXT:    s_and_b32 s2, s2, 0xffff
 ; GCN-NEXT:    s_or_b32 s2, s2, s7
 ; GCN-NEXT:    s_lshr_b32 s7, s1, 24
-; GCN-NEXT:    s_cmp_lg_u32 s6, 7
-; GCN-NEXT:    s_cselect_b32 s7, s7, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 7
+; GCN-NEXT:    s_cselect_b32 s7, 1, s7
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 8
 ; GCN-NEXT:    s_lshr_b32 s8, s1, 16
-; GCN-NEXT:    s_cmp_lg_u32 s6, 6
-; GCN-NEXT:    s_cselect_b32 s8, s8, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 6
+; GCN-NEXT:    s_cselect_b32 s8, 1, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 0xff
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 16
 ; GCN-NEXT:    s_lshr_b32 s8, s1, 8
-; GCN-NEXT:    s_cmp_lg_u32 s6, 5
-; GCN-NEXT:    s_cselect_b32 s8, s8, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 5
+; GCN-NEXT:    s_cselect_b32 s8, 1, s8
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 8
-; GCN-NEXT:    s_cmp_lg_u32 s6, 4
-; GCN-NEXT:    s_cselect_b32 s1, s1, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 4
+; GCN-NEXT:    s_cselect_b32 s1, 1, s1
 ; GCN-NEXT:    s_and_b32 s1, s1, 0xff
 ; GCN-NEXT:    s_or_b32 s1, s1, s8
 ; GCN-NEXT:    s_and_b32 s1, s1, 0xffff
 ; GCN-NEXT:    s_or_b32 s1, s1, s7
 ; GCN-NEXT:    s_lshr_b32 s7, s0, 24
-; GCN-NEXT:    s_cmp_lg_u32 s6, 3
-; GCN-NEXT:    s_cselect_b32 s7, s7, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 3
+; GCN-NEXT:    s_cselect_b32 s7, 1, s7
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 8
 ; GCN-NEXT:    s_lshr_b32 s8, s0, 16
-; GCN-NEXT:    s_cmp_lg_u32 s6, 2
-; GCN-NEXT:    s_cselect_b32 s8, s8, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 2
+; GCN-NEXT:    s_cselect_b32 s8, 1, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 0xff
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 16
 ; GCN-NEXT:    s_lshr_b32 s8, s0, 8
-; GCN-NEXT:    s_cmp_lg_u32 s6, 1
-; GCN-NEXT:    s_cselect_b32 s8, s8, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 1
+; GCN-NEXT:    s_cselect_b32 s8, 1, s8
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 8
-; GCN-NEXT:    s_cmp_lg_u32 s6, 0
-; GCN-NEXT:    s_cselect_b32 s0, s0, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 0
+; GCN-NEXT:    s_cselect_b32 s0, 1, s0
 ; GCN-NEXT:    s_and_b32 s0, s0, 0xff
 ; GCN-NEXT:    s_or_b32 s0, s0, s8
 ; GCN-NEXT:    s_and_b32 s0, s0, 0xffff
@@ -1166,37 +1166,37 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
 ; GCN-NEXT:    s_bfe_u32 s13, s3, 0x3000d
 ; GCN-NEXT:    s_bfe_u32 s10, s3, 0x2000e
 ; GCN-NEXT:    s_bfe_u32 s9, s3, 0x1000f
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x7f
-; GCN-NEXT:    s_cselect_b32 s4, s4, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x7f
+; GCN-NEXT:    s_cselect_b32 s4, 1, s4
 ; GCN-NEXT:    s_lshl_b32 s4, s4, 3
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x7e
-; GCN-NEXT:    s_cselect_b32 s5, s5, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x7e
+; GCN-NEXT:    s_cselect_b32 s5, 1, s5
 ; GCN-NEXT:    s_and_b32 s5, s5, 1
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 2
 ; GCN-NEXT:    s_or_b32 s4, s4, s5
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x7d
-; GCN-NEXT:    s_cselect_b32 s5, s7, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x7d
+; GCN-NEXT:    s_cselect_b32 s5, 1, s7
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x7c
-; GCN-NEXT:    s_cselect_b32 s7, s8, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x7c
+; GCN-NEXT:    s_cselect_b32 s7, 1, s8
 ; GCN-NEXT:    s_and_b32 s7, s7, 1
 ; GCN-NEXT:    s_or_b32 s5, s7, s5
 ; GCN-NEXT:    s_and_b32 s5, s5, 3
 ; GCN-NEXT:    s_or_b32 s4, s5, s4
 ; GCN-NEXT:    s_lshl_b32 s4, s4, 12
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x7b
-; GCN-NEXT:    s_cselect_b32 s5, s11, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x7b
+; GCN-NEXT:    s_cselect_b32 s5, 1, s11
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 3
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x7a
-; GCN-NEXT:    s_cselect_b32 s7, s12, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x7a
+; GCN-NEXT:    s_cselect_b32 s7, 1, s12
 ; GCN-NEXT:    s_and_b32 s7, s7, 1
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 2
 ; GCN-NEXT:    s_or_b32 s5, s5, s7
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x79
-; GCN-NEXT:    s_cselect_b32 s7, s15, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x79
+; GCN-NEXT:    s_cselect_b32 s7, 1, s15
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x78
-; GCN-NEXT:    s_cselect_b32 s8, s16, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x78
+; GCN-NEXT:    s_cselect_b32 s8, 1, s16
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_and_b32 s7, s7, 3
@@ -1204,37 +1204,37 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
 ; GCN-NEXT:    s_and_b32 s5, s5, 15
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 8
 ; GCN-NEXT:    s_or_b32 s4, s4, s5
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x77
-; GCN-NEXT:    s_cselect_b32 s5, s19, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x77
+; GCN-NEXT:    s_cselect_b32 s5, 1, s19
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 3
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x76
-; GCN-NEXT:    s_cselect_b32 s7, s20, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x76
+; GCN-NEXT:    s_cselect_b32 s7, 1, s20
 ; GCN-NEXT:    s_and_b32 s7, s7, 1
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 2
 ; GCN-NEXT:    s_or_b32 s5, s5, s7
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x75
-; GCN-NEXT:    s_cselect_b32 s7, s23, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x75
+; GCN-NEXT:    s_cselect_b32 s7, 1, s23
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x74
-; GCN-NEXT:    s_cselect_b32 s8, s24, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x74
+; GCN-NEXT:    s_cselect_b32 s8, 1, s24
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_and_b32 s7, s7, 3
 ; GCN-NEXT:    s_or_b32 s5, s7, s5
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 4
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x73
-; GCN-NEXT:    s_cselect_b32 s7, s27, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x73
+; GCN-NEXT:    s_cselect_b32 s7, 1, s27
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 3
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x72
-; GCN-NEXT:    s_cselect_b32 s8, s28, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x72
+; GCN-NEXT:    s_cselect_b32 s8, 1, s28
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 2
 ; GCN-NEXT:    s_or_b32 s7, s7, s8
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x71
-; GCN-NEXT:    s_cselect_b32 s8, s31, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x71
+; GCN-NEXT:    s_cselect_b32 s8, 1, s31
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x70
-; GCN-NEXT:    s_cselect_b32 s11, s33, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x70
+; GCN-NEXT:    s_cselect_b32 s11, 1, s33
 ; GCN-NEXT:    s_and_b32 s11, s11, 1
 ; GCN-NEXT:    s_or_b32 s8, s11, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 3
@@ -1244,37 +1244,37 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
 ; GCN-NEXT:    s_and_b32 s5, s5, 0xff
 ; GCN-NEXT:    s_or_b32 s4, s5, s4
 ; GCN-NEXT:    s_lshl_b32 s4, s4, 16
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x6f
-; GCN-NEXT:    s_cselect_b32 s5, s9, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x6f
+; GCN-NEXT:    s_cselect_b32 s5, 1, s9
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 3
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x6e
-; GCN-NEXT:    s_cselect_b32 s7, s10, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x6e
+; GCN-NEXT:    s_cselect_b32 s7, 1, s10
 ; GCN-NEXT:    s_and_b32 s7, s7, 1
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 2
 ; GCN-NEXT:    s_or_b32 s5, s5, s7
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x6d
-; GCN-NEXT:    s_cselect_b32 s7, s13, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x6d
+; GCN-NEXT:    s_cselect_b32 s7, 1, s13
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x6c
-; GCN-NEXT:    s_cselect_b32 s8, s14, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x6c
+; GCN-NEXT:    s_cselect_b32 s8, 1, s14
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_and_b32 s7, s7, 3
 ; GCN-NEXT:    s_or_b32 s5, s7, s5
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 12
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x6b
-; GCN-NEXT:    s_cselect_b32 s7, s17, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x6b
+; GCN-NEXT:    s_cselect_b32 s7, 1, s17
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 3
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x6a
-; GCN-NEXT:    s_cselect_b32 s8, s18, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x6a
+; GCN-NEXT:    s_cselect_b32 s8, 1, s18
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 2
 ; GCN-NEXT:    s_or_b32 s7, s7, s8
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x69
-; GCN-NEXT:    s_cselect_b32 s8, s21, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x69
+; GCN-NEXT:    s_cselect_b32 s8, 1, s21
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x68
-; GCN-NEXT:    s_cselect_b32 s9, s22, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x68
+; GCN-NEXT:    s_cselect_b32 s9, 1, s22
 ; GCN-NEXT:    s_and_b32 s9, s9, 1
 ; GCN-NEXT:    s_or_b32 s8, s9, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 3
@@ -1282,37 +1282,37 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
 ; GCN-NEXT:    s_and_b32 s7, s7, 15
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 8
 ; GCN-NEXT:    s_or_b32 s5, s5, s7
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x67
-; GCN-NEXT:    s_cselect_b32 s7, s25, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x67
+; GCN-NEXT:    s_cselect_b32 s7, 1, s25
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 3
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x66
-; GCN-NEXT:    s_cselect_b32 s8, s26, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x66
+; GCN-NEXT:    s_cselect_b32 s8, 1, s26
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 2
 ; GCN-NEXT:    s_or_b32 s7, s7, s8
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x65
-; GCN-NEXT:    s_cselect_b32 s8, s29, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x65
+; GCN-NEXT:    s_cselect_b32 s8, 1, s29
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x64
-; GCN-NEXT:    s_cselect_b32 s9, s30, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x64
+; GCN-NEXT:    s_cselect_b32 s9, 1, s30
 ; GCN-NEXT:    s_and_b32 s9, s9, 1
 ; GCN-NEXT:    s_or_b32 s8, s9, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 3
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 4
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x63
-; GCN-NEXT:    s_cselect_b32 s8, s34, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x63
+; GCN-NEXT:    s_cselect_b32 s8, 1, s34
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 3
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x62
-; GCN-NEXT:    s_cselect_b32 s9, s35, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x62
+; GCN-NEXT:    s_cselect_b32 s9, 1, s35
 ; GCN-NEXT:    s_and_b32 s9, s9, 1
 ; GCN-NEXT:    s_lshl_b32 s9, s9, 2
 ; GCN-NEXT:    s_or_b32 s8, s8, s9
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x60
-; GCN-NEXT:    s_cselect_b32 s3, s3, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x60
+; GCN-NEXT:    s_cselect_b32 s3, 1, s3
 ; GCN-NEXT:    s_and_b32 s3, s3, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x61
-; GCN-NEXT:    s_cselect_b32 s9, s36, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x61
+; GCN-NEXT:    s_cselect_b32 s9, 1, s36
 ; GCN-NEXT:    s_lshl_b32 s9, s9, 1
 ; GCN-NEXT:    s_or_b32 s3, s3, s9
 ; GCN-NEXT:    s_and_b32 s3, s3, 3
@@ -1323,37 +1323,37 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
 ; GCN-NEXT:    s_or_b32 s3, s3, s5
 ; GCN-NEXT:    s_and_b32 s3, s3, 0xffff
 ; GCN-NEXT:    s_or_b32 s3, s3, s4
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x5f
-; GCN-NEXT:    s_cselect_b32 s4, s37, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x5f
+; GCN-NEXT:    s_cselect_b32 s4, 1, s37
 ; GCN-NEXT:    s_lshl_b32 s4, s4, 3
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x5e
-; GCN-NEXT:    s_cselect_b32 s5, s38, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x5e
+; GCN-NEXT:    s_cselect_b32 s5, 1, s38
 ; GCN-NEXT:    s_and_b32 s5, s5, 1
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 2
 ; GCN-NEXT:    s_or_b32 s4, s4, s5
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x5d
-; GCN-NEXT:    s_cselect_b32 s5, s39, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x5d
+; GCN-NEXT:    s_cselect_b32 s5, 1, s39
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x5c
-; GCN-NEXT:    s_cselect_b32 s7, vcc_hi, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x5c
+; GCN-NEXT:    s_cselect_b32 s7, 1, vcc_hi
 ; GCN-NEXT:    s_and_b32 s7, s7, 1
 ; GCN-NEXT:    s_or_b32 s5, s7, s5
 ; GCN-NEXT:    s_and_b32 s5, s5, 3
 ; GCN-NEXT:    s_or_b32 s4, s5, s4
 ; GCN-NEXT:    s_lshl_b32 s4, s4, 12
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x5b
-; GCN-NEXT:    s_cselect_b32 s5, s94, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x5b
+; GCN-NEXT:    s_cselect_b32 s5, 1, s94
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 3
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x5a
-; GCN-NEXT:    s_cselect_b32 s7, s93, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x5a
+; GCN-NEXT:    s_cselect_b32 s7, 1, s93
 ; GCN-NEXT:    s_and_b32 s7, s7, 1
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 2
 ; GCN-NEXT:    s_or_b32 s5, s5, s7
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x59
-; GCN-NEXT:    s_cselect_b32 s7, s90, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x59
+; GCN-NEXT:    s_cselect_b32 s7, 1, s90
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x58
-; GCN-NEXT:    s_cselect_b32 s8, s89, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x58
+; GCN-NEXT:    s_cselect_b32 s8, 1, s89
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_and_b32 s7, s7, 3
@@ -1361,37 +1361,37 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
 ; GCN-NEXT:    s_and_b32 s5, s5, 15
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 8
 ; GCN-NEXT:    s_or_b32 s4, s4, s5
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x57
-; GCN-NEXT:    s_cselect_b32 s5, s86, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x57
+; GCN-NEXT:    s_cselect_b32 s5, 1, s86
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 3
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x56
-; GCN-NEXT:    s_cselect_b32 s7, s84, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x56
+; GCN-NEXT:    s_cselect_b32 s7, 1, s84
 ; GCN-NEXT:    s_and_b32 s7, s7, 1
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 2
 ; GCN-NEXT:    s_or_b32 s5, s5, s7
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x55
-; GCN-NEXT:    s_cselect_b32 s7, s82, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x55
+; GCN-NEXT:    s_cselect_b32 s7, 1, s82
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x54
-; GCN-NEXT:    s_cselect_b32 s8, s81, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x54
+; GCN-NEXT:    s_cselect_b32 s8, 1, s81
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_and_b32 s7, s7, 3
 ; GCN-NEXT:    s_or_b32 s5, s7, s5
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 4
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x53
-; GCN-NEXT:    s_cselect_b32 s7, s78, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x53
+; GCN-NEXT:    s_cselect_b32 s7, 1, s78
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 3
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x52
-; GCN-NEXT:    s_cselect_b32 s8, s77, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x52
+; GCN-NEXT:    s_cselect_b32 s8, 1, s77
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 2
 ; GCN-NEXT:    s_or_b32 s7, s7, s8
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x51
-; GCN-NEXT:    s_cselect_b32 s8, s74, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x51
+; GCN-NEXT:    s_cselect_b32 s8, 1, s74
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x50
-; GCN-NEXT:    s_cselect_b32 s9, s73, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x50
+; GCN-NEXT:    s_cselect_b32 s9, 1, s73
 ; GCN-NEXT:    s_and_b32 s9, s9, 1
 ; GCN-NEXT:    s_or_b32 s8, s9, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 3
@@ -1401,37 +1401,37 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
 ; GCN-NEXT:    s_and_b32 s5, s5, 0xff
 ; GCN-NEXT:    s_or_b32 s4, s5, s4
 ; GCN-NEXT:    s_lshl_b32 s4, s4, 16
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x4f
-; GCN-NEXT:    s_cselect_b32 s5, vcc_lo, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x4f
+; GCN-NEXT:    s_cselect_b32 s5, 1, vcc_lo
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 3
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x4e
-; GCN-NEXT:    s_cselect_b32 s7, s95, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x4e
+; GCN-NEXT:    s_cselect_b32 s7, 1, s95
 ; GCN-NEXT:    s_and_b32 s7, s7, 1
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 2
 ; GCN-NEXT:    s_or_b32 s5, s5, s7
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x4d
-; GCN-NEXT:    s_cselect_b32 s7, s92, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x4d
+; GCN-NEXT:    s_cselect_b32 s7, 1, s92
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x4c
-; GCN-NEXT:    s_cselect_b32 s8, s91, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x4c
+; GCN-NEXT:    s_cselect_b32 s8, 1, s91
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_and_b32 s7, s7, 3
 ; GCN-NEXT:    s_or_b32 s5, s7, s5
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 12
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x4b
-; GCN-NEXT:    s_cselect_b32 s7, s88, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x4b
+; GCN-NEXT:    s_cselect_b32 s7, 1, s88
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 3
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x4a
-; GCN-NEXT:    s_cselect_b32 s8, s87, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x4a
+; GCN-NEXT:    s_cselect_b32 s8, 1, s87
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 2
 ; GCN-NEXT:    s_or_b32 s7, s7, s8
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x49
-; GCN-NEXT:    s_cselect_b32 s8, s85, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x49
+; GCN-NEXT:    s_cselect_b32 s8, 1, s85
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x48
-; GCN-NEXT:    s_cselect_b32 s9, s83, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x48
+; GCN-NEXT:    s_cselect_b32 s9, 1, s83
 ; GCN-NEXT:    s_and_b32 s9, s9, 1
 ; GCN-NEXT:    s_or_b32 s8, s9, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 3
@@ -1439,37 +1439,37 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
 ; GCN-NEXT:    s_and_b32 s7, s7, 15
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 8
 ; GCN-NEXT:    s_or_b32 s5, s5, s7
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x47
-; GCN-NEXT:    s_cselect_b32 s7, s80, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x47
+; GCN-NEXT:    s_cselect_b32 s7, 1, s80
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 3
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x46
-; GCN-NEXT:    s_cselect_b32 s8, s79, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x46
+; GCN-NEXT:    s_cselect_b32 s8, 1, s79
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 2
 ; GCN-NEXT:    s_or_b32 s7, s7, s8
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x45
-; GCN-NEXT:    s_cselect_b32 s8, s76, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x45
+; GCN-NEXT:    s_cselect_b32 s8, 1, s76
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x44
-; GCN-NEXT:    s_cselect_b32 s9, s75, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x44
+; GCN-NEXT:    s_cselect_b32 s9, 1, s75
 ; GCN-NEXT:    s_and_b32 s9, s9, 1
 ; GCN-NEXT:    s_or_b32 s8, s9, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 3
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 4
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x43
-; GCN-NEXT:    s_cselect_b32 s8, s72, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x43
+; GCN-NEXT:    s_cselect_b32 s8, 1, s72
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 3
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x42
-; GCN-NEXT:    s_cselect_b32 s9, s71, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x42
+; GCN-NEXT:    s_cselect_b32 s9, 1, s71
 ; GCN-NEXT:    s_and_b32 s9, s9, 1
 ; GCN-NEXT:    s_lshl_b32 s9, s9, 2
 ; GCN-NEXT:    s_or_b32 s8, s8, s9
-; GCN-NEXT:    s_cmp_lg_u32 s6, 64
-; GCN-NEXT:    s_cselect_b32 s2, s2, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 64
+; GCN-NEXT:    s_cselect_b32 s2, 1, s2
 ; GCN-NEXT:    s_and_b32 s2, s2, 1
-; GCN-NEXT:    s_cmpk_lg_i32 s6, 0x41
-; GCN-NEXT:    s_cselect_b32 s9, s70, 1
+; GCN-NEXT:    s_cmpk_eq_i32 s6, 0x41
+; GCN-NEXT:    s_cselect_b32 s9, 1, s70
 ; GCN-NEXT:    s_lshl_b32 s9, s9, 1
 ; GCN-NEXT:    s_or_b32 s2, s2, s9
 ; GCN-NEXT:    s_and_b32 s2, s2, 3
@@ -1480,37 +1480,37 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
 ; GCN-NEXT:    s_or_b32 s2, s2, s5
 ; GCN-NEXT:    s_and_b32 s2, s2, 0xffff
 ; GCN-NEXT:    s_or_b32 s2, s2, s4
-; GCN-NEXT:    s_cmp_lg_u32 s6, 63
-; GCN-NEXT:    s_cselect_b32 s4, s69, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 63
+; GCN-NEXT:    s_cselect_b32 s4, 1, s69
 ; GCN-NEXT:    s_lshl_b32 s4, s4, 3
-; GCN-NEXT:    s_cmp_lg_u32 s6, 62
-; GCN-NEXT:    s_cselect_b32 s5, s68, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 62
+; GCN-NEXT:    s_cselect_b32 s5, 1, s68
 ; GCN-NEXT:    s_and_b32 s5, s5, 1
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 2
 ; GCN-NEXT:    s_or_b32 s4, s4, s5
-; GCN-NEXT:    s_cmp_lg_u32 s6, 61
-; GCN-NEXT:    s_cselect_b32 s5, s67, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 61
+; GCN-NEXT:    s_cselect_b32 s5, 1, s67
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 60
-; GCN-NEXT:    s_cselect_b32 s7, s66, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 60
+; GCN-NEXT:    s_cselect_b32 s7, 1, s66
 ; GCN-NEXT:    s_and_b32 s7, s7, 1
 ; GCN-NEXT:    s_or_b32 s5, s7, s5
 ; GCN-NEXT:    s_and_b32 s5, s5, 3
 ; GCN-NEXT:    s_or_b32 s4, s5, s4
 ; GCN-NEXT:    s_lshl_b32 s4, s4, 12
-; GCN-NEXT:    s_cmp_lg_u32 s6, 59
-; GCN-NEXT:    s_cselect_b32 s5, s63, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 59
+; GCN-NEXT:    s_cselect_b32 s5, 1, s63
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 3
-; GCN-NEXT:    s_cmp_lg_u32 s6, 58
-; GCN-NEXT:    s_cselect_b32 s7, s61, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 58
+; GCN-NEXT:    s_cselect_b32 s7, 1, s61
 ; GCN-NEXT:    s_and_b32 s7, s7, 1
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 2
 ; GCN-NEXT:    s_or_b32 s5, s5, s7
-; GCN-NEXT:    s_cmp_lg_u32 s6, 57
-; GCN-NEXT:    s_cselect_b32 s7, s59, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 57
+; GCN-NEXT:    s_cselect_b32 s7, 1, s59
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 56
-; GCN-NEXT:    s_cselect_b32 s8, s58, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 56
+; GCN-NEXT:    s_cselect_b32 s8, 1, s58
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_and_b32 s7, s7, 3
@@ -1518,37 +1518,37 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
 ; GCN-NEXT:    s_and_b32 s5, s5, 15
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 8
 ; GCN-NEXT:    s_or_b32 s4, s4, s5
-; GCN-NEXT:    s_cmp_lg_u32 s6, 55
-; GCN-NEXT:    s_cselect_b32 s5, s55, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 55
+; GCN-NEXT:    s_cselect_b32 s5, 1, s55
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 3
-; GCN-NEXT:    s_cmp_lg_u32 s6, 54
-; GCN-NEXT:    s_cselect_b32 s7, s53, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 54
+; GCN-NEXT:    s_cselect_b32 s7, 1, s53
 ; GCN-NEXT:    s_and_b32 s7, s7, 1
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 2
 ; GCN-NEXT:    s_or_b32 s5, s5, s7
-; GCN-NEXT:    s_cmp_lg_u32 s6, 53
-; GCN-NEXT:    s_cselect_b32 s7, s51, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 53
+; GCN-NEXT:    s_cselect_b32 s7, 1, s51
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 52
-; GCN-NEXT:    s_cselect_b32 s8, s50, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 52
+; GCN-NEXT:    s_cselect_b32 s8, 1, s50
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_and_b32 s7, s7, 3
 ; GCN-NEXT:    s_or_b32 s5, s7, s5
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 4
-; GCN-NEXT:    s_cmp_lg_u32 s6, 51
-; GCN-NEXT:    s_cselect_b32 s7, s47, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 51
+; GCN-NEXT:    s_cselect_b32 s7, 1, s47
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 3
-; GCN-NEXT:    s_cmp_lg_u32 s6, 50
-; GCN-NEXT:    s_cselect_b32 s8, s45, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 50
+; GCN-NEXT:    s_cselect_b32 s8, 1, s45
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 2
 ; GCN-NEXT:    s_or_b32 s7, s7, s8
-; GCN-NEXT:    s_cmp_lg_u32 s6, 49
-; GCN-NEXT:    s_cselect_b32 s8, s43, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 49
+; GCN-NEXT:    s_cselect_b32 s8, 1, s43
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 48
-; GCN-NEXT:    s_cselect_b32 s9, s42, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 48
+; GCN-NEXT:    s_cselect_b32 s9, 1, s42
 ; GCN-NEXT:    s_and_b32 s9, s9, 1
 ; GCN-NEXT:    s_or_b32 s8, s9, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 3
@@ -1558,37 +1558,37 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
 ; GCN-NEXT:    s_and_b32 s5, s5, 0xff
 ; GCN-NEXT:    s_or_b32 s4, s5, s4
 ; GCN-NEXT:    s_lshl_b32 s4, s4, 16
-; GCN-NEXT:    s_cmp_lg_u32 s6, 47
-; GCN-NEXT:    s_cselect_b32 s5, s65, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 47
+; GCN-NEXT:    s_cselect_b32 s5, 1, s65
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 3
-; GCN-NEXT:    s_cmp_lg_u32 s6, 46
-; GCN-NEXT:    s_cselect_b32 s7, s64, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 46
+; GCN-NEXT:    s_cselect_b32 s7, 1, s64
 ; GCN-NEXT:    s_and_b32 s7, s7, 1
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 2
 ; GCN-NEXT:    s_or_b32 s5, s5, s7
-; GCN-NEXT:    s_cmp_lg_u32 s6, 45
-; GCN-NEXT:    s_cselect_b32 s7, s62, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 45
+; GCN-NEXT:    s_cselect_b32 s7, 1, s62
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 44
-; GCN-NEXT:    s_cselect_b32 s8, s60, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 44
+; GCN-NEXT:    s_cselect_b32 s8, 1, s60
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_and_b32 s7, s7, 3
 ; GCN-NEXT:    s_or_b32 s5, s7, s5
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 12
-; GCN-NEXT:    s_cmp_lg_u32 s6, 43
-; GCN-NEXT:    s_cselect_b32 s7, s57, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 43
+; GCN-NEXT:    s_cselect_b32 s7, 1, s57
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 3
-; GCN-NEXT:    s_cmp_lg_u32 s6, 42
-; GCN-NEXT:    s_cselect_b32 s8, s56, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 42
+; GCN-NEXT:    s_cselect_b32 s8, 1, s56
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 2
 ; GCN-NEXT:    s_or_b32 s7, s7, s8
-; GCN-NEXT:    s_cmp_lg_u32 s6, 41
-; GCN-NEXT:    s_cselect_b32 s8, s54, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 41
+; GCN-NEXT:    s_cselect_b32 s8, 1, s54
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 40
-; GCN-NEXT:    s_cselect_b32 s9, s52, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 40
+; GCN-NEXT:    s_cselect_b32 s9, 1, s52
 ; GCN-NEXT:    s_and_b32 s9, s9, 1
 ; GCN-NEXT:    s_or_b32 s8, s9, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 3
@@ -1596,38 +1596,38 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
 ; GCN-NEXT:    s_and_b32 s7, s7, 15
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 8
 ; GCN-NEXT:    s_or_b32 s5, s5, s7
-; GCN-NEXT:    s_cmp_lg_u32 s6, 39
-; GCN-NEXT:    s_cselect_b32 s7, s49, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 39
+; GCN-NEXT:    s_cselect_b32 s7, 1, s49
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 3
-; GCN-NEXT:    s_cmp_lg_u32 s6, 38
-; GCN-NEXT:    s_cselect_b32 s8, s48, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 38
+; GCN-NEXT:    s_cselect_b32 s8, 1, s48
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 2
 ; GCN-NEXT:    s_or_b32 s7, s7, s8
-; GCN-NEXT:    s_cmp_lg_u32 s6, 37
-; GCN-NEXT:    s_cselect_b32 s8, s46, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 37
+; GCN-NEXT:    s_cselect_b32 s8, 1, s46
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 36
-; GCN-NEXT:    s_cselect_b32 s9, s44, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 36
+; GCN-NEXT:    s_cselect_b32 s9, 1, s44
 ; GCN-NEXT:    s_and_b32 s9, s9, 1
 ; GCN-NEXT:    s_or_b32 s8, s9, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 3
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 4
-; GCN-NEXT:    s_cmp_lg_u32 s6, 35
-; GCN-NEXT:    s_cselect_b32 s8, s41, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 35
+; GCN-NEXT:    s_cselect_b32 s8, 1, s41
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 3
-; GCN-NEXT:    s_cmp_lg_u32 s6, 34
-; GCN-NEXT:    s_cselect_b32 s9, s40, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 34
+; GCN-NEXT:    s_cselect_b32 s9, 1, s40
 ; GCN-NEXT:    s_and_b32 s9, s9, 1
 ; GCN-NEXT:    s_lshl_b32 s9, s9, 2
 ; GCN-NEXT:    s_or_b32 s8, s8, s9
-; GCN-NEXT:    s_cmp_lg_u32 s6, 32
-; GCN-NEXT:    s_cselect_b32 s1, s1, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 32
+; GCN-NEXT:    s_cselect_b32 s1, 1, s1
 ; GCN-NEXT:    s_and_b32 s1, s1, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 33
+; GCN-NEXT:    s_cmp_eq_u32 s6, 33
 ; GCN-NEXT:    v_readlane_b32 s9, v6, 33
-; GCN-NEXT:    s_cselect_b32 s9, s9, 1
+; GCN-NEXT:    s_cselect_b32 s9, 1, s9
 ; GCN-NEXT:    s_lshl_b32 s9, s9, 1
 ; GCN-NEXT:    s_or_b32 s1, s1, s9
 ; GCN-NEXT:    s_and_b32 s1, s1, 3
@@ -1638,45 +1638,45 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
 ; GCN-NEXT:    s_or_b32 s1, s1, s5
 ; GCN-NEXT:    s_and_b32 s1, s1, 0xffff
 ; GCN-NEXT:    s_or_b32 s1, s1, s4
-; GCN-NEXT:    s_cmp_lg_u32 s6, 31
+; GCN-NEXT:    s_cmp_eq_u32 s6, 31
 ; GCN-NEXT:    v_readlane_b32 s4, v6, 17
-; GCN-NEXT:    s_cselect_b32 s4, s4, 1
+; GCN-NEXT:    s_cselect_b32 s4, 1, s4
 ; GCN-NEXT:    s_lshl_b32 s4, s4, 3
-; GCN-NEXT:    s_cmp_lg_u32 s6, 30
+; GCN-NEXT:    s_cmp_eq_u32 s6, 30
 ; GCN-NEXT:    v_readlane_b32 s5, v6, 16
-; GCN-NEXT:    s_cselect_b32 s5, s5, 1
+; GCN-NEXT:    s_cselect_b32 s5, 1, s5
 ; GCN-NEXT:    s_and_b32 s5, s5, 1
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 2
 ; GCN-NEXT:    s_or_b32 s4, s4, s5
-; GCN-NEXT:    s_cmp_lg_u32 s6, 29
+; GCN-NEXT:    s_cmp_eq_u32 s6, 29
 ; GCN-NEXT:    v_readlane_b32 s5, v6, 15
-; GCN-NEXT:    s_cselect_b32 s5, s5, 1
+; GCN-NEXT:    s_cselect_b32 s5, 1, s5
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 28
+; GCN-NEXT:    s_cmp_eq_u32 s6, 28
 ; GCN-NEXT:    v_readlane_b32 s7, v6, 14
-; GCN-NEXT:    s_cselect_b32 s7, s7, 1
+; GCN-NEXT:    s_cselect_b32 s7, 1, s7
 ; GCN-NEXT:    s_and_b32 s7, s7, 1
 ; GCN-NEXT:    s_or_b32 s5, s7, s5
 ; GCN-NEXT:    s_and_b32 s5, s5, 3
 ; GCN-NEXT:    s_or_b32 s4, s5, s4
 ; GCN-NEXT:    s_lshl_b32 s4, s4, 12
-; GCN-NEXT:    s_cmp_lg_u32 s6, 27
+; GCN-NEXT:    s_cmp_eq_u32 s6, 27
 ; GCN-NEXT:    v_readlane_b32 s5, v6, 13
-; GCN-NEXT:    s_cselect_b32 s5, s5, 1
+; GCN-NEXT:    s_cselect_b32 s5, 1, s5
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 3
-; GCN-NEXT:    s_cmp_lg_u32 s6, 26
+; GCN-NEXT:    s_cmp_eq_u32 s6, 26
 ; GCN-NEXT:    v_readlane_b32 s7, v6, 12
-; GCN-NEXT:    s_cselect_b32 s7, s7, 1
+; GCN-NEXT:    s_cselect_b32 s7, 1, s7
 ; GCN-NEXT:    s_and_b32 s7, s7, 1
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 2
 ; GCN-NEXT:    s_or_b32 s5, s5, s7
-; GCN-NEXT:    s_cmp_lg_u32 s6, 25
+; GCN-NEXT:    s_cmp_eq_u32 s6, 25
 ; GCN-NEXT:    v_readlane_b32 s7, v6, 11
-; GCN-NEXT:    s_cselect_b32 s7, s7, 1
+; GCN-NEXT:    s_cselect_b32 s7, 1, s7
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 24
+; GCN-NEXT:    s_cmp_eq_u32 s6, 24
 ; GCN-NEXT:    v_readlane_b32 s8, v6, 10
-; GCN-NEXT:    s_cselect_b32 s8, s8, 1
+; GCN-NEXT:    s_cselect_b32 s8, 1, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_and_b32 s7, s7, 3
@@ -1684,45 +1684,45 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
 ; GCN-NEXT:    s_and_b32 s5, s5, 15
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 8
 ; GCN-NEXT:    s_or_b32 s4, s4, s5
-; GCN-NEXT:    s_cmp_lg_u32 s6, 23
+; GCN-NEXT:    s_cmp_eq_u32 s6, 23
 ; GCN-NEXT:    v_readlane_b32 s5, v6, 9
-; GCN-NEXT:    s_cselect_b32 s5, s5, 1
+; GCN-NEXT:    s_cselect_b32 s5, 1, s5
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 3
-; GCN-NEXT:    s_cmp_lg_u32 s6, 22
+; GCN-NEXT:    s_cmp_eq_u32 s6, 22
 ; GCN-NEXT:    v_readlane_b32 s7, v6, 8
-; GCN-NEXT:    s_cselect_b32 s7, s7, 1
+; GCN-NEXT:    s_cselect_b32 s7, 1, s7
 ; GCN-NEXT:    s_and_b32 s7, s7, 1
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 2
 ; GCN-NEXT:    s_or_b32 s5, s5, s7
-; GCN-NEXT:    s_cmp_lg_u32 s6, 21
+; GCN-NEXT:    s_cmp_eq_u32 s6, 21
 ; GCN-NEXT:    v_readlane_b32 s7, v6, 7
-; GCN-NEXT:    s_cselect_b32 s7, s7, 1
+; GCN-NEXT:    s_cselect_b32 s7, 1, s7
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 20
+; GCN-NEXT:    s_cmp_eq_u32 s6, 20
 ; GCN-NEXT:    v_readlane_b32 s8, v6, 6
-; GCN-NEXT:    s_cselect_b32 s8, s8, 1
+; GCN-NEXT:    s_cselect_b32 s8, 1, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_and_b32 s7, s7, 3
 ; GCN-NEXT:    s_or_b32 s5, s7, s5
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 4
-; GCN-NEXT:    s_cmp_lg_u32 s6, 19
+; GCN-NEXT:    s_cmp_eq_u32 s6, 19
 ; GCN-NEXT:    v_readlane_b32 s7, v6, 5
-; GCN-NEXT:    s_cselect_b32 s7, s7, 1
+; GCN-NEXT:    s_cselect_b32 s7, 1, s7
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 3
-; GCN-NEXT:    s_cmp_lg_u32 s6, 18
+; GCN-NEXT:    s_cmp_eq_u32 s6, 18
 ; GCN-NEXT:    v_readlane_b32 s8, v6, 4
-; GCN-NEXT:    s_cselect_b32 s8, s8, 1
+; GCN-NEXT:    s_cselect_b32 s8, 1, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 2
 ; GCN-NEXT:    s_or_b32 s7, s7, s8
-; GCN-NEXT:    s_cmp_lg_u32 s6, 17
+; GCN-NEXT:    s_cmp_eq_u32 s6, 17
 ; GCN-NEXT:    v_readlane_b32 s8, v6, 3
-; GCN-NEXT:    s_cselect_b32 s8, s8, 1
+; GCN-NEXT:    s_cselect_b32 s8, 1, s8
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 16
+; GCN-NEXT:    s_cmp_eq_u32 s6, 16
 ; GCN-NEXT:    v_readlane_b32 s9, v6, 2
-; GCN-NEXT:    s_cselect_b32 s9, s9, 1
+; GCN-NEXT:    s_cselect_b32 s9, 1, s9
 ; GCN-NEXT:    s_and_b32 s9, s9, 1
 ; GCN-NEXT:    s_or_b32 s8, s9, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 3
@@ -1732,45 +1732,45 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
 ; GCN-NEXT:    s_and_b32 s5, s5, 0xff
 ; GCN-NEXT:    s_or_b32 s4, s5, s4
 ; GCN-NEXT:    s_lshl_b32 s4, s4, 16
-; GCN-NEXT:    s_cmp_lg_u32 s6, 15
+; GCN-NEXT:    s_cmp_eq_u32 s6, 15
 ; GCN-NEXT:    v_readlane_b32 s5, v6, 32
-; GCN-NEXT:    s_cselect_b32 s5, s5, 1
+; GCN-NEXT:    s_cselect_b32 s5, 1, s5
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 3
-; GCN-NEXT:    s_cmp_lg_u32 s6, 14
+; GCN-NEXT:    s_cmp_eq_u32 s6, 14
 ; GCN-NEXT:    v_readlane_b32 s7, v6, 31
-; GCN-NEXT:    s_cselect_b32 s7, s7, 1
+; GCN-NEXT:    s_cselect_b32 s7, 1, s7
 ; GCN-NEXT:    s_and_b32 s7, s7, 1
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 2
 ; GCN-NEXT:    s_or_b32 s5, s5, s7
-; GCN-NEXT:    s_cmp_lg_u32 s6, 13
+; GCN-NEXT:    s_cmp_eq_u32 s6, 13
 ; GCN-NEXT:    v_readlane_b32 s7, v6, 30
-; GCN-NEXT:    s_cselect_b32 s7, s7, 1
+; GCN-NEXT:    s_cselect_b32 s7, 1, s7
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 12
+; GCN-NEXT:    s_cmp_eq_u32 s6, 12
 ; GCN-NEXT:    v_readlane_b32 s8, v6, 29
-; GCN-NEXT:    s_cselect_b32 s8, s8, 1
+; GCN-NEXT:    s_cselect_b32 s8, 1, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_and_b32 s7, s7, 3
 ; GCN-NEXT:    s_or_b32 s5, s7, s5
 ; GCN-NEXT:    s_lshl_b32 s5, s5, 12
-; GCN-NEXT:    s_cmp_lg_u32 s6, 11
+; GCN-NEXT:    s_cmp_eq_u32 s6, 11
 ; GCN-NEXT:    v_readlane_b32 s7, v6, 28
-; GCN-NEXT:    s_cselect_b32 s7, s7, 1
+; GCN-NEXT:    s_cselect_b32 s7, 1, s7
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 3
-; GCN-NEXT:    s_cmp_lg_u32 s6, 10
+; GCN-NEXT:    s_cmp_eq_u32 s6, 10
 ; GCN-NEXT:    v_readlane_b32 s8, v6, 27
-; GCN-NEXT:    s_cselect_b32 s8, s8, 1
+; GCN-NEXT:    s_cselect_b32 s8, 1, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 2
 ; GCN-NEXT:    s_or_b32 s7, s7, s8
-; GCN-NEXT:    s_cmp_lg_u32 s6, 9
+; GCN-NEXT:    s_cmp_eq_u32 s6, 9
 ; GCN-NEXT:    v_readlane_b32 s8, v6, 26
-; GCN-NEXT:    s_cselect_b32 s8, s8, 1
+; GCN-NEXT:    s_cselect_b32 s8, 1, s8
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 8
+; GCN-NEXT:    s_cmp_eq_u32 s6, 8
 ; GCN-NEXT:    v_readlane_b32 s9, v6, 25
-; GCN-NEXT:    s_cselect_b32 s9, s9, 1
+; GCN-NEXT:    s_cselect_b32 s9, 1, s9
 ; GCN-NEXT:    s_and_b32 s9, s9, 1
 ; GCN-NEXT:    s_or_b32 s8, s9, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 3
@@ -1778,44 +1778,44 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
 ; GCN-NEXT:    s_and_b32 s7, s7, 15
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 8
 ; GCN-NEXT:    s_or_b32 s5, s5, s7
-; GCN-NEXT:    s_cmp_lg_u32 s6, 7
+; GCN-NEXT:    s_cmp_eq_u32 s6, 7
 ; GCN-NEXT:    v_readlane_b32 s7, v6, 24
-; GCN-NEXT:    s_cselect_b32 s7, s7, 1
+; GCN-NEXT:    s_cselect_b32 s7, 1, s7
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 3
-; GCN-NEXT:    s_cmp_lg_u32 s6, 6
+; GCN-NEXT:    s_cmp_eq_u32 s6, 6
 ; GCN-NEXT:    v_readlane_b32 s8, v6, 23
-; GCN-NEXT:    s_cselect_b32 s8, s8, 1
+; GCN-NEXT:    s_cselect_b32 s8, 1, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 1
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 2
 ; GCN-NEXT:    s_or_b32 s7, s7, s8
-; GCN-NEXT:    s_cmp_lg_u32 s6, 5
+; GCN-NEXT:    s_cmp_eq_u32 s6, 5
 ; GCN-NEXT:    v_readlane_b32 s8, v6, 22
-; GCN-NEXT:    s_cselect_b32 s8, s8, 1
+; GCN-NEXT:    s_cselect_b32 s8, 1, s8
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 4
+; GCN-NEXT:    s_cmp_eq_u32 s6, 4
 ; GCN-NEXT:    v_readlane_b32 s9, v6, 21
-; GCN-NEXT:    s_cselect_b32 s9, s9, 1
+; GCN-NEXT:    s_cselect_b32 s9, 1, s9
 ; GCN-NEXT:    s_and_b32 s9, s9, 1
 ; GCN-NEXT:    s_or_b32 s8, s9, s8
 ; GCN-NEXT:    s_and_b32 s8, s8, 3
 ; GCN-NEXT:    s_or_b32 s7, s8, s7
 ; GCN-NEXT:    s_lshl_b32 s7, s7, 4
-; GCN-NEXT:    s_cmp_lg_u32 s6, 3
+; GCN-NEXT:    s_cmp_eq_u32 s6, 3
 ; GCN-NEXT:    v_readlane_b32 s8, v6, 20
-; GCN-NEXT:    s_cselect_b32 s8, s8, 1
+; GCN-NEXT:    s_cselect_b32 s8, 1, s8
 ; GCN-NEXT:    s_lshl_b32 s8, s8, 3
-; GCN-NEXT:    s_cmp_lg_u32 s6, 2
+; GCN-NEXT:    s_cmp_eq_u32 s6, 2
 ; GCN-NEXT:    v_readlane_b32 s9, v6, 19
-; GCN-NEXT:    s_cselect_b32 s9, s9, 1
+; GCN-NEXT:    s_cselect_b32 s9, 1, s9
 ; GCN-NEXT:    s_and_b32 s9, s9, 1
 ; GCN-NEXT:    s_lshl_b32 s9, s9, 2
 ; GCN-NEXT:    s_or_b32 s8, s8, s9
-; GCN-NEXT:    s_cmp_lg_u32 s6, 0
-; GCN-NEXT:    s_cselect_b32 s0, s0, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 0
+; GCN-NEXT:    s_cselect_b32 s0, 1, s0
 ; GCN-NEXT:    s_and_b32 s0, s0, 1
-; GCN-NEXT:    s_cmp_lg_u32 s6, 1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 1
 ; GCN-NEXT:    v_readlane_b32 s6, v6, 18
-; GCN-NEXT:    s_cselect_b32 s6, s6, 1
+; GCN-NEXT:    s_cselect_b32 s6, 1, s6
 ; GCN-NEXT:    s_lshl_b32 s6, s6, 1
 ; GCN-NEXT:    s_or_b32 s0, s0, s6
 ; GCN-NEXT:    s_and_b32 s0, s0, 3
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
index be16fac4c53f7..1d88b2a63a726 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
@@ -499,16 +499,16 @@ define amdgpu_kernel void @dynamic_insertelement_v2f32(ptr addrspace(1) %out, <2
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x2
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
-; SI-NEXT:    v_mov_b32_e32 v0, 0x40a00000
+; SI-NEXT:    v_mov_b32_e32 v2, 0x40a00000
 ; SI-NEXT:    s_mov_b32 s7, 0x100f000
 ; SI-NEXT:    s_mov_b32 s6, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_cmp_lg_u32 s2, 1
-; SI-NEXT:    v_mov_b32_e32 v1, s1
+; SI-NEXT:    s_cmp_eq_u32 s2, 1
+; SI-NEXT:    v_mov_b32_e32 v0, s1
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_lg_u32 s2, 0
-; SI-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
-; SI-NEXT:    v_mov_b32_e32 v2, s0
+; SI-NEXT:    s_cmp_eq_u32 s2, 0
+; SI-NEXT:    v_cndmask_b32_e32 v1, v0, v2, vcc
+; SI-NEXT:    v_mov_b32_e32 v0, s0
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
 ; SI-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -518,16 +518,16 @@ define amdgpu_kernel void @dynamic_insertelement_v2f32(ptr addrspace(1) %out, <2
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x8
 ; VI-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
-; VI-NEXT:    v_mov_b32_e32 v0, 0x40a00000
+; VI-NEXT:    v_mov_b32_e32 v2, 0x40a00000
 ; VI-NEXT:    s_mov_b32 s7, 0x1100f000
 ; VI-NEXT:    s_mov_b32 s6, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_cmp_lg_u32 s2, 1
-; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    s_cmp_eq_u32 s2, 1
+; VI-NEXT:    v_mov_b32_e32 v0, s1
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_lg_u32 s2, 0
-; VI-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
-; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    s_cmp_eq_u32 s2, 0
+; VI-NEXT:    v_cndmask_b32_e32 v1, v0, v2, vcc
+; VI-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -541,49 +541,49 @@ define amdgpu_kernel void @dynamic_insertelement_v3f32(ptr addrspace(1) %out, <3
 ; SI-LABEL: dynamic_insertelement_v3f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s10, s[8:9], 0x8
-; SI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
-; SI-NEXT:    s_load_dwordx4 s[4:7], s[8:9], 0x4
-; SI-NEXT:    v_mov_b32_e32 v0, 0x40a00000
-; SI-NEXT:    s_mov_b32 s3, 0x100f000
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x4
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
+; SI-NEXT:    v_mov_b32_e32 v3, 0x40a00000
+; SI-NEXT:    s_mov_b32 s7, 0x100f000
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_cmp_lg_u32 s10, 2
+; SI-NEXT:    s_cmp_eq_u32 s10, 2
+; SI-NEXT:    v_mov_b32_e32 v0, s2
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    v_mov_b32_e32 v1, s6
-; SI-NEXT:    s_cmp_lg_u32 s10, 1
-; SI-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
-; SI-NEXT:    v_mov_b32_e32 v1, s5
+; SI-NEXT:    s_cmp_eq_u32 s10, 1
+; SI-NEXT:    v_cndmask_b32_e32 v2, v0, v3, vcc
+; SI-NEXT:    v_mov_b32_e32 v0, s1
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_lg_u32 s10, 0
-; SI-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
-; SI-NEXT:    v_mov_b32_e32 v3, s4
+; SI-NEXT:    s_cmp_eq_u32 s10, 0
+; SI-NEXT:    v_cndmask_b32_e32 v1, v0, v3, vcc
+; SI-NEXT:    v_mov_b32_e32 v0, s0
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_mov_b32 s6, -1
 ; SI-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; SI-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; SI-NEXT:    buffer_store_dwordx3 v[0:2], off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: dynamic_insertelement_v3f32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dword s10, s[8:9], 0x20
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[8:9], 0x10
-; VI-NEXT:    v_mov_b32_e32 v0, 0x40a00000
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x10
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
+; VI-NEXT:    v_mov_b32_e32 v3, 0x40a00000
+; VI-NEXT:    s_mov_b32 s7, 0x1100f000
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_cmp_lg_u32 s10, 2
+; VI-NEXT:    s_cmp_eq_u32 s10, 2
+; VI-NEXT:    v_mov_b32_e32 v0, s2
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    v_mov_b32_e32 v1, s6
-; VI-NEXT:    s_cmp_lg_u32 s10, 1
-; VI-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
-; VI-NEXT:    v_mov_b32_e32 v1, s5
+; VI-NEXT:    s_cmp_eq_u32 s10, 1
+; VI-NEXT:    v_cndmask_b32_e32 v2, v0, v3, vcc
+; VI-NEXT:    v_mov_b32_e32 v0, s1
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_lg_u32 s10, 0
-; VI-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
-; VI-NEXT:    v_mov_b32_e32 v3, s4
+; VI-NEXT:    s_cmp_eq_u32 s10, 0
+; VI-NEXT:    v_cndmask_b32_e32 v1, v0, v3, vcc
+; VI-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_mov_b32 s6, -1
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; VI-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; VI-NEXT:    buffer_store_dwordx3 v[0:2], off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
   %vecins = insertelement <3 x float> %a, float 5.000000e+00, i32 %b
   store <3 x float> %vecins, ptr addrspace(1) %out, align 16
@@ -594,57 +594,57 @@ define amdgpu_kernel void @dynamic_insertelement_v4f32(ptr addrspace(1) %out, <4
 ; SI-LABEL: dynamic_insertelement_v4f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s10, s[8:9], 0x8
-; SI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
-; SI-NEXT:    s_load_dwordx4 s[4:7], s[8:9], 0x4
-; SI-NEXT:    v_mov_b32_e32 v0, 0x40a00000
-; SI-NEXT:    s_mov_b32 s3, 0x100f000
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x4
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
+; SI-NEXT:    v_mov_b32_e32 v4, 0x40a00000
+; SI-NEXT:    s_mov_b32 s7, 0x100f000
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_cmp_lg_u32 s10, 3
+; SI-NEXT:    s_cmp_eq_u32 s10, 3
+; SI-NEXT:    v_mov_b32_e32 v0, s3
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    v_mov_b32_e32 v1, s7
-; SI-NEXT:    s_cmp_lg_u32 s10, 2
-; SI-NEXT:    v_cndmask_b32_e32 v3, v0, v1, vcc
-; SI-NEXT:    v_mov_b32_e32 v1, s6
+; SI-NEXT:    s_cmp_eq_u32 s10, 2
+; SI-NEXT:    v_cndmask_b32_e32 v3, v0, v4, vcc
+; SI-NEXT:    v_mov_b32_e32 v0, s2
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_lg_u32 s10, 1
-; SI-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
-; SI-NEXT:    v_mov_b32_e32 v1, s5
+; SI-NEXT:    s_cmp_eq_u32 s10, 1
+; SI-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; SI-NEXT:    v_mov_b32_e32 v0, s1
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_lg_u32 s10, 0
-; SI-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
-; SI-NEXT:    v_mov_b32_e32 v4, s4
+; SI-NEXT:    s_cmp_eq_u32 s10, 0
+; SI-NEXT:    v_cndmask_b32_e32 v1, v0, v4, vcc
+; SI-NEXT:    v_mov_b32_e32 v0, s0
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_mov_b32 s6, -1
 ; SI-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: dynamic_insertelement_v4f32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dword s10, s[8:9], 0x20
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[8:9], 0x10
-; VI-NEXT:    v_mov_b32_e32 v0, 0x40a00000
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x10
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
+; VI-NEXT:    v_mov_b32_e32 v4, 0x40a00000
+; VI-NEXT:    s_mov_b32 s7, 0x1100f000
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_cmp_lg_u32 s10, 3
+; VI-NEXT:    s_cmp_eq_u32 s10, 3
+; VI-NEXT:    v_mov_b32_e32 v0, s3
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    v_mov_b32_e32 v1, s7
-; VI-NEXT:    s_cmp_lg_u32 s10, 2
-; VI-NEXT:    v_cndmask_b32_e32 v3, v0, v1, vcc
-; VI-NEXT:    v_mov_b32_e32 v1, s6
+; VI-NEXT:    s_cmp_eq_u32 s10, 2
+; VI-NEXT:    v_cndmask_b32_e32 v3, v0, v4, vcc
+; VI-NEXT:    v_mov_b32_e32 v0, s2
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_lg_u32 s10, 1
-; VI-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
-; VI-NEXT:    v_mov_b32_e32 v1, s5
+; VI-NEXT:    s_cmp_eq_u32 s10, 1
+; VI-NEXT:    v_cndmask_b32_e32 v2, v0, v4, vcc
+; VI-NEXT:    v_mov_b32_e32 v0, s1
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_lg_u32 s10, 0
-; VI-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
-; VI-NEXT:    v_mov_b32_e32 v4, s4
+; VI-NEXT:    s_cmp_eq_u32 s10, 0
+; VI-NEXT:    v_cndmask_b32_e32 v1, v0, v4, vcc
+; VI-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_mov_b32 s6, -1
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
   %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 %b
   store <4 x float> %vecins, ptr addrspace(1) %out, align 16
@@ -1024,10 +1024,10 @@ define amdgpu_kernel void @dynamic_insertelement_v2i32(ptr addrspace(1) %out, <2
 ; SI-NEXT:    s_mov_b32 s7, 0x100f000
 ; SI-NEXT:    s_mov_b32 s6, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_cmp_lg_u32 s2, 1
-; SI-NEXT:    s_cselect_b32 s1, s1, 5
-; SI-NEXT:    s_cmp_lg_u32 s2, 0
-; SI-NEXT:    s_cselect_b32 s0, s0, 5
+; SI-NEXT:    s_cmp_eq_u32 s2, 1
+; SI-NEXT:    s_cselect_b32 s1, 5, s1
+; SI-NEXT:    s_cmp_eq_u32 s2, 0
+; SI-NEXT:    s_cselect_b32 s0, 5, s0
 ; SI-NEXT:    v_mov_b32_e32 v0, s0
 ; SI-NEXT:    v_mov_b32_e32 v1, s1
 ; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -1040,10 +1040,10 @@ define amdgpu_kernel void @dynamic_insertelement_v2i32(ptr addrspace(1) %out, <2
 ; VI-NEXT:    s_mov_b32 s7, 0x1100f000
 ; VI-NEXT:    s_mov_b32 s6, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_cmp_lg_u32 s2, 1
-; VI-NEXT:    s_cselect_b32 s1, s1, 5
-; VI-NEXT:    s_cmp_lg_u32 s2, 0
-; VI-NEXT:    s_cselect_b32 s0, s0, 5
+; VI-NEXT:    s_cmp_eq_u32 s2, 1
+; VI-NEXT:    s_cselect_b32 s1, 5, s1
+; VI-NEXT:    s_cmp_eq_u32 s2, 0
+; VI-NEXT:    s_cselect_b32 s0, 5, s0
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -1062,12 +1062,12 @@ define amdgpu_kernel void @dynamic_insertelement_v3i32(ptr addrspace(1) %out, <3
 ; SI-NEXT:    s_mov_b32 s7, 0x100f000
 ; SI-NEXT:    s_mov_b32 s6, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_cmp_lg_u32 s10, 2
-; SI-NEXT:    s_cselect_b32 s2, s2, 5
-; SI-NEXT:    s_cmp_lg_u32 s10, 1
-; SI-NEXT:    s_cselect_b32 s1, s1, 5
-; SI-NEXT:    s_cmp_lg_u32 s10, 0
-; SI-NEXT:    s_cselect_b32 s0, s0, 5
+; SI-NEXT:    s_cmp_eq_u32 s10, 2
+; SI-NEXT:    s_cselect_b32 s2, 5, s2
+; SI-NEXT:    s_cmp_eq_u32 s10, 1
+; SI-NEXT:    s_cselect_b32 s1, 5, s1
+; SI-NEXT:    s_cmp_eq_u32 s10, 0
+; SI-NEXT:    s_cselect_b32 s0, 5, s0
 ; SI-NEXT:    v_mov_b32_e32 v0, s0
 ; SI-NEXT:    v_mov_b32_e32 v1, s1
 ; SI-NEXT:    v_mov_b32_e32 v2, s2
@@ -1082,12 +1082,12 @@ define amdgpu_kernel void @dynamic_insertelement_v3i32(ptr addrspace(1) %out, <3
 ; VI-NEXT:    s_mov_b32 s7, 0x1100f000
 ; VI-NEXT:    s_mov_b32 s6, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_cmp_lg_u32 s10, 2
-; VI-NEXT:    s_cselect_b32 s2, s2, 5
-; VI-NEXT:    s_cmp_lg_u32 s10, 1
-; VI-NEXT:    s_cselect_b32 s1, s1, 5
-; VI-NEXT:    s_cmp_lg_u32 s10, 0
-; VI-NEXT:    s_cselect_b32 s0, s0, 5
+; VI-NEXT:    s_cmp_eq_u32 s10, 2
+; VI-NEXT:    s_cselect_b32 s2, 5, s2
+; VI-NEXT:    s_cmp_eq_u32 s10, 1
+; VI-NEXT:    s_cselect_b32 s1, 5, s1
+; VI-NEXT:    s_cmp_eq_u32 s10, 0
+; VI-NEXT:    s_cselect_b32 s0, 5, s0
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    v_mov_b32_e32 v2, s2
@@ -1777,81 +1777,81 @@ define amdgpu_kernel void @dynamic_insertelement_v16i8(ptr addrspace(1) %out, <1
 ; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_lshr_b32 s8, s7, 24
-; SI-NEXT:    s_cmp_lg_u32 s10, 15
-; SI-NEXT:    s_cselect_b32 s8, s8, 5
+; SI-NEXT:    s_cmp_eq_u32 s10, 15
+; SI-NEXT:    s_cselect_b32 s8, 5, s8
 ; SI-NEXT:    s_lshl_b32 s8, s8, 24
 ; SI-NEXT:    s_lshr_b32 s9, s7, 16
-; SI-NEXT:    s_cmp_lg_u32 s10, 14
-; SI-NEXT:    s_cselect_b32 s9, s9, 5
+; SI-NEXT:    s_cmp_eq_u32 s10, 14
+; SI-NEXT:    s_cselect_b32 s9, 5, s9
 ; SI-NEXT:    s_and_b32 s9, s9, 0xff
 ; SI-NEXT:    s_lshl_b32 s9, s9, 16
 ; SI-NEXT:    s_or_b32 s8, s8, s9
 ; SI-NEXT:    s_lshr_b32 s9, s7, 8
-; SI-NEXT:    s_cmp_lg_u32 s10, 13
-; SI-NEXT:    s_cselect_b32 s9, s9, 5
+; SI-NEXT:    s_cmp_eq_u32 s10, 13
+; SI-NEXT:    s_cselect_b32 s9, 5, s9
 ; SI-NEXT:    s_lshl_b32 s9, s9, 8
-; SI-NEXT:    s_cmp_lg_u32 s10, 12
-; SI-NEXT:    s_cselect_b32 s7, s7, 5
+; SI-NEXT:    s_cmp_eq_u32 s10, 12
+; SI-NEXT:    s_cselect_b32 s7, 5, s7
 ; SI-NEXT:    s_and_b32 s7, s7, 0xff
 ; SI-NEXT:    s_or_b32 s7, s7, s9
 ; SI-NEXT:    s_and_b32 s7, s7, 0xffff
 ; SI-NEXT:    s_or_b32 s7, s7, s8
 ; SI-NEXT:    s_lshr_b32 s8, s6, 24
-; SI-NEXT:    s_cmp_lg_u32 s10, 11
-; SI-NEXT:    s_cselect_b32 s8, s8, 5
+; SI-NEXT:    s_cmp_eq_u32 s10, 11
+; SI-NEXT:    s_cselect_b32 s8, 5, s8
 ; SI-NEXT:    s_lshl_b32 s8, s8, 24
 ; SI-NEXT:    s_lshr_b32 s9, s6, 16
-; SI-NEXT:    s_cmp_lg_u32 s10, 10
-; SI-NEXT:    s_cselect_b32 s9, s9, 5
+; SI-NEXT:    s_cmp_eq_u32 s10, 10
+; SI-NEXT:    s_cselect_b32 s9, 5, s9
 ; SI-NEXT:    s_and_b32 s9, s9, 0xff
 ; SI-NEXT:    s_lshl_b32 s9, s9, 16
 ; SI-NEXT:    s_or_b32 s8, s8, s9
 ; SI-NEXT:    s_lshr_b32 s9, s6, 8
-; SI-NEXT:    s_cmp_lg_u32 s10, 9
-; SI-NEXT:    s_cselect_b32 s9, s9, 5
+; SI-NEXT:    s_cmp_eq_u32 s10, 9
+; SI-NEXT:    s_cselect_b32 s9, 5, s9
 ; SI-NEXT:    s_lshl_b32 s9, s9, 8
-; SI-NEXT:    s_cmp_lg_u32 s10, 8
-; SI-NEXT:    s_cselect_b32 s6, s6, 5
+; SI-NEXT:    s_cmp_eq_u32 s10, 8
+; SI-NEXT:    s_cselect_b32 s6, 5, s6
 ; SI-NEXT:    s_and_b32 s6, s6, 0xff
 ; SI-NEXT:    s_or_b32 s6, s6, s9
 ; SI-NEXT:    s_and_b32 s6, s6, 0xffff
 ; SI-NEXT:    s_or_b32 s6, s6, s8
 ; SI-NEXT:    s_lshr_b32 s8, s5, 24
-; SI-NEXT:    s_cmp_lg_u32 s10, 7
-; SI-NEXT:    s_cselect_b32 s8, s8, 5
+; SI-NEXT:    s_cmp_eq_u32 s10, 7
+; SI-NEXT:    s_cselect_b32 s8, 5, s8
 ; SI-NEXT:    s_lshl_b32 s8, s8, 24
 ; SI-NEXT:    s_lshr_b32 s9, s5, 16
-; SI-NEXT:    s_cmp_lg_u32 s10, 6
-; SI-NEXT:    s_cselect_b32 s9, s9, 5
+; SI-NEXT:    s_cmp_eq_u32 s10, 6
+; SI-NEXT:    s_cselect_b32 s9, 5, s9
 ; SI-NEXT:    s_and_b32 s9, s9, 0xff
 ; SI-NEXT:    s_lshl_b32 s9, s9, 16
 ; SI-NEXT:    s_or_b32 s8, s8, s9
 ; SI-NEXT:    s_lshr_b32 s9, s5, 8
-; SI-NEXT:    s_cmp_lg_u32 s10, 5
-; SI-NEXT:    s_cselect_b32 s9, s9, 5
+; SI-NEXT:    s_cmp_eq_u32 s10, 5
+; SI-NEXT:    s_cselect_b32 s9, 5, s9
 ; SI-NEXT:    s_lshl_b32 s9, s9, 8
-; SI-NEXT:    s_cmp_lg_u32 s10, 4
-; SI-NEXT:    s_cselect_b32 s5, s5, 5
+; SI-NEXT:    s_cmp_eq_u32 s10, 4
+; SI-NEXT:    s_cselect_b32 s5, 5, s5
 ; SI-NEXT:    s_and_b32 s5, s5, 0xff
 ; SI-NEXT:    s_or_b32 s5, s5, s9
 ; SI-NEXT:    s_and_b32 s5, s5, 0xffff
 ; SI-NEXT:    s_or_b32 s5, s5, s8
 ; SI-NEXT:    s_lshr_b32 s8, s4, 24
-; SI-NEXT:    s_cmp_lg_u32 s10, 3
-; SI-NEXT:    s_cselect_b32 s8, s8, 5
+; SI-NEXT:    s_cmp_eq_u32 s10, 3
+; SI-NEXT:    s_cselect_b32 s8, 5, s8
 ; SI-NEXT:    s_lshl_b32 s8, s8, 24
 ; SI-NEXT:    s_lshr_b32 s9, s4, 16
-; SI-NEXT:    s_cmp_lg_u32 s10, 2
-; SI-NEXT:    s_cselect_b32 s9, s9, 5
+; SI-NEXT:    s_cmp_eq_u32 s10, 2
+; SI-NEXT:    s_cselect_b32 s9, 5, s9
 ; SI-NEXT:    s_and_b32 s9, s9, 0xff
 ; SI-NEXT:    s_lshl_b32 s9, s9, 16
 ; SI-NEXT:    s_or_b32 s8, s8, s9
 ; SI-NEXT:    s_lshr_b32 s9, s4, 8
-; SI-NEXT:    s_cmp_lg_u32 s10, 1
-; SI-NEXT:    s_cselect_b32 s9, s9, 5
+; SI-NEXT:    s_cmp_eq_u32 s10, 1
+; SI-NEXT:    s_cselect_b32 s9, 5, s9
 ; SI-NEXT:    s_lshl_b32 s9, s9, 8
-; SI-NEXT:    s_cmp_lg_u32 s10, 0
-; SI-NEXT:    s_cselect_b32 s4, s4, 5
+; SI-NEXT:    s_cmp_eq_u32 s10, 0
+; SI-NEXT:    s_cselect_b32 s4, 5, s4
 ; SI-NEXT:    s_and_b32 s4, s4, 0xff
 ; SI-NEXT:    s_or_b32 s4, s4, s9
 ; SI-NEXT:    s_and_b32 s4, s4, 0xffff
@@ -1872,81 +1872,81 @@ define amdgpu_kernel void @dynamic_insertelement_v16i8(ptr addrspace(1) %out, <1
 ; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    s_lshr_b32 s8, s7, 24
-; VI-NEXT:    s_cmp_lg_u32 s10, 15
-; VI-NEXT:    s_cselect_b32 s8, s8, 5
+; VI-NEXT:    s_cmp_eq_u32 s10, 15
+; VI-NEXT:    s_cselect_b32 s8, 5, s8
 ; VI-NEXT:    s_lshl_b32 s8, s8, 8
 ; VI-NEXT:    s_lshr_b32 s9, s7, 16
-; VI-NEXT:    s_cmp_lg_u32 s10, 14
-; VI-NEXT:    s_cselect_b32 s9, s9, 5
+; VI-NEXT:    s_cmp_eq_u32 s10, 14
+; VI-NEXT:    s_cselect_b32 s9, 5, s9
 ; VI-NEXT:    s_and_b32 s9, s9, 0xff
 ; VI-NEXT:    s_or_b32 s8, s9, s8
 ; VI-NEXT:    s_lshl_b32 s8, s8, 16
 ; VI-NEXT:    s_lshr_b32 s9, s7, 8
-; VI-NEXT:    s_cmp_lg_u32 s10, 13
-; VI-NEXT:    s_cselect_b32 s9, s9, 5
+; VI-NEXT:    s_cmp_eq_u32 s10, 13
+; VI-NEXT:    s_cselect_b32 s9, 5, s9
 ; VI-NEXT:    s_lshl_b32 s9, s9, 8
-; VI-NEXT:    s_cmp_lg_u32 s10, 12
-; VI-NEXT:    s_cselect_b32 s7, s7, 5
+; VI-NEXT:    s_cmp_eq_u32 s10, 12
+; VI-NEXT:    s_cselect_b32 s7, 5, s7
 ; VI-NEXT:    s_and_b32 s7, s7, 0xff
 ; VI-NEXT:    s_or_b32 s7, s7, s9
 ; VI-NEXT:    s_and_b32 s7, s7, 0xffff
 ; VI-NEXT:    s_or_b32 s7, s7, s8
 ; VI-NEXT:    s_lshr_b32 s8, s6, 24
-; VI-NEXT:    s_cmp_lg_u32 s10, 11
-; VI-NEXT:    s_cselect_b32 s8, s8, 5
+; VI-NEXT:    s_cmp_eq_u32 s10, 11
+; VI-NEXT:    s_cselect_b32 s8, 5, s8
 ; VI-NEXT:    s_lshl_b32 s8, s8, 8
 ; VI-NEXT:    s_lshr_b32 s9, s6, 16
-; VI-NEXT:    s_cmp_lg_u32 s10, 10
-; VI-NEXT:    s_cselect_b32 s9, s9, 5
+; VI-NEXT:    s_cmp_eq_u32 s10, 10
+; VI-NEXT:    s_cselect_b32 s9, 5, s9
 ; VI-NEXT:    s_and_b32 s9, s9, 0xff
 ; VI-NEXT:    s_or_b32 s8, s9, s8
 ; VI-NEXT:    s_lshl_b32 s8, s8, 16
 ; VI-NEXT:    s_lshr_b32 s9, s6, 8
-; VI-NEXT:    s_cmp_lg_u32 s10, 9
-; VI-NEXT:    s_cselect_b32 s9, s9, 5
+; VI-NEXT:    s_cmp_eq_u32 s10, 9
+; VI-NEXT:    s_cselect_b32 s9, 5, s9
 ; VI-NEXT:    s_lshl_b32 s9, s9, 8
-; VI-NEXT:    s_cmp_lg_u32 s10, 8
-; VI-NEXT:    s_cselect_b32 s6, s6, 5
+; VI-NEXT:    s_cmp_eq_u32 s10, 8
+; VI-NEXT:    s_cselect_b32 s6, 5, s6
 ; VI-NEXT:    s_and_b32 s6, s6, 0xff
 ; VI-NEXT:    s_or_b32 s6, s6, s9
 ; VI-NEXT:    s_and_b32 s6, s6, 0xffff
 ; VI-NEXT:    s_or_b32 s6, s6, s8
 ; VI-NEXT:    s_lshr_b32 s8, s5, 24
-; VI-NEXT:    s_cmp_lg_u32 s10, 7
-; VI-NEXT:    s_cselect_b32 s8, s8, 5
+; VI-NEXT:    s_cmp_eq_u32 s10, 7
+; VI-NEXT:    s_cselect_b32 s8, 5, s8
 ; VI-NEXT:    s_lshl_b32 s8, s8, 8
 ; VI-NEXT:    s_lshr_b32 s9, s5, 16
-; VI-NEXT:    s_cmp_lg_u32 s10, 6
-; VI-NEXT:    s_cselect_b32 s9, s9, 5
+; VI-NEXT:    s_cmp_eq_u32 s10, 6
+; VI-NEXT:    s_cselect_b32 s9, 5, s9
 ; VI-NEXT:    s_and_b32 s9, s9, 0xff
 ; VI-NEXT:    s_or_b32 s8, s9, s8
 ; VI-NEXT:    s_lshl_b32 s8, s8, 16
 ; VI-NEXT:    s_lshr_b32 s9, s5, 8
-; VI-NEXT:    s_cmp_lg_u32 s10, 5
-; VI-NEXT:    s_cselect_b32 s9, s9, 5
+; VI-NEXT:    s_cmp_eq_u32 s10, 5
+; VI-NEXT:    s_cselect_b32 s9, 5, s9
 ; VI-NEXT:    s_lshl_b32 s9, s9, 8
-; VI-NEXT:    s_cmp_lg_u32 s10, 4
-; VI-NEXT:    s_cselect_b32 s5, s5, 5
+; VI-NEXT:    s_cmp_eq_u32 s10, 4
+; VI-NEXT:    s_cselect_b32 s5, 5, s5
 ; VI-NEXT:    s_and_b32 s5, s5, 0xff
 ; VI-NEXT:    s_or_b32 s5, s5, s9
 ; VI-NEXT:    s_and_b32 s5, s5, 0xffff
 ; VI-NEXT:    s_or_b32 s5, s5, s8
 ; VI-NEXT:    s_lshr_b32 s8, s4, 24
-; VI-NEXT:    s_cmp_lg_u32 s10, 3
-; VI-NEXT:    s_cselect_b32 s8, s8, 5
+; VI-NEXT:    s_cmp_eq_u32 s10, 3
+; VI-NEXT:    s_cselect_b32 s8, 5, s8
 ; VI-NEXT:    s_lshl_b32 s8, s8, 8
 ; VI-NEXT:    s_lshr_b32 s9, s4, 16
-; VI-NEXT:    s_cmp_lg_u32 s10, 2
-; VI-NEXT:    s_cselect_b32 s9, s9, 5
+; VI-NEXT:    s_cmp_eq_u32 s10, 2
+; VI-NEXT:    s_cselect_b32 s9, 5, s9
 ; VI-NEXT:    s_and_b32 s9, s9, 0xff
 ; VI-NEXT:    s_or_b32 s8, s9, s8
 ; VI-NEXT:    s_lshl_b32 s8, s8, 16
 ; VI-NEXT:    s_lshr_b32 s9, s4, 8
-; VI-NEXT:    s_cmp_lg_u32 s10, 1
-; VI-NEXT:    s_cselect_b32 s9, s9, 5
+; VI-NEXT:    s_cmp_eq_u32 s10, 1
+; VI-NEXT:    s_cselect_b32 s9, 5, s9
 ; VI-NEXT:    s_lshl_b32 s9, s9, 8
-; VI-NEXT:    s_cmp_lg_u32 s10, 0
-; VI-NEXT:    s_cselect_b32 s4, s4, 5
+; VI-NEXT:    s_cmp_eq_u32 s10, 0
+; VI-NEXT:    s_cselect_b32 s4, 5, s4
 ; VI-NEXT:    s_and_b32 s4, s4, 0xff
 ; VI-NEXT:    s_or_b32 s4, s4, s9
 ; VI-NEXT:    s_and_b32 s4, s4, 0xffff
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll
index 47a371d8de07c..7338b693cb525 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll
@@ -1054,42 +1054,42 @@ define amdgpu_kernel void @v_insertelement_v8bf16_dynamic(ptr addrspace(1) %out,
 ; SI-NEXT:    s_load_dwordx2 s[8:9], s[8:9], 0x4
 ; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_cmp_eq_u32 s9, 6
+; SI-NEXT:    s_cmp_lg_u32 s9, 6
 ; SI-NEXT:    v_mov_b32_e32 v6, s8
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s9, 7
+; SI-NEXT:    s_cmp_lg_u32 s9, 7
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_cndmask_b32_e32 v7, v3, v6, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v7, v6, v3, vcc
 ; SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s9, 4
-; SI-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; SI-NEXT:    s_cmp_lg_u32 s9, 4
+; SI-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s9, 5
+; SI-NEXT:    s_cmp_lg_u32 s9, 5
 ; SI-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
-; SI-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s9, 2
+; SI-NEXT:    s_cmp_lg_u32 s9, 2
 ; SI-NEXT:    v_and_b32_e32 v7, 0xffff, v7
 ; SI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; SI-NEXT:    v_cndmask_b32_e32 v8, v8, v6, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v8, v6, v8, vcc
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s9, 3
+; SI-NEXT:    s_cmp_lg_u32 s9, 3
 ; SI-NEXT:    v_lshrrev_b32_e32 v9, 16, v1
 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; SI-NEXT:    v_or_b32_e32 v3, v7, v3
 ; SI-NEXT:    v_lshlrev_b32_e32 v7, 16, v8
-; SI-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s9, 0
+; SI-NEXT:    s_cmp_lg_u32 s9, 0
 ; SI-NEXT:    v_or_b32_e32 v2, v2, v7
-; SI-NEXT:    v_cndmask_b32_e32 v7, v9, v6, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v7, v6, v9, vcc
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s9, 1
+; SI-NEXT:    s_cmp_lg_u32 s9, 1
 ; SI-NEXT:    v_lshrrev_b32_e32 v10, 16, v0
-; SI-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    v_cndmask_b32_e32 v6, v10, v6, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
 ; SI-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; SI-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
 ; SI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
@@ -1114,41 +1114,41 @@ define amdgpu_kernel void @v_insertelement_v8bf16_dynamic(ptr addrspace(1) %out,
 ; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
 ; VI-NEXT:    v_mov_b32_e32 v5, s1
 ; VI-NEXT:    v_add_u32_e32 v4, vcc, s0, v4
-; VI-NEXT:    s_cmp_eq_u32 s5, 6
+; VI-NEXT:    s_cmp_lg_u32 s5, 6
 ; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-NEXT:    v_mov_b32_e32 v6, s4
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s5, 7
+; VI-NEXT:    s_cmp_lg_u32 s5, 7
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cndmask_b32_e32 v7, v3, v6, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v7, v6, v3, vcc
 ; VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s5, 4
-; VI-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; VI-NEXT:    s_cmp_lg_u32 s5, 4
+; VI-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s5, 5
+; VI-NEXT:    s_cmp_lg_u32 s5, 5
 ; VI-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
-; VI-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s5, 2
+; VI-NEXT:    s_cmp_lg_u32 s5, 2
 ; VI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; VI-NEXT:    v_cndmask_b32_e32 v8, v8, v6, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v8, v6, v8, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s5, 3
+; VI-NEXT:    s_cmp_lg_u32 s5, 3
 ; VI-NEXT:    v_lshrrev_b32_e32 v9, 16, v1
 ; VI-NEXT:    v_or_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    v_lshlrev_b32_e32 v7, 16, v8
-; VI-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s5, 0
+; VI-NEXT:    s_cmp_lg_u32 s5, 0
 ; VI-NEXT:    v_or_b32_sdwa v2, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NEXT:    v_cndmask_b32_e32 v7, v9, v6, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v7, v6, v9, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s5, 1
-; VI-NEXT:    v_cndmask_b32_e32 v8, v0, v6, vcc
+; VI-NEXT:    s_cmp_lg_u32 s5, 1
+; VI-NEXT:    v_cndmask_b32_e32 v8, v6, v0, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
 ; VI-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
-; VI-NEXT:    v_cndmask_b32_sdwa v0, v0, v6, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_cndmask_b32_sdwa v0, v6, v0, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; VI-NEXT:    v_or_b32_sdwa v1, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
@@ -1162,36 +1162,36 @@ define amdgpu_kernel void @v_insertelement_v8bf16_dynamic(ptr addrspace(1) %out,
 ; GFX900-NEXT:    s_mov_b32 s14, 0x5040100
 ; GFX900-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX900-NEXT:    global_load_dwordx4 v[0:3], v4, s[18:19]
-; GFX900-NEXT:    s_cmp_eq_u32 s13, 6
+; GFX900-NEXT:    s_cmp_lg_u32 s13, 6
 ; GFX900-NEXT:    s_cselect_b64 vcc, -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s13, 7
+; GFX900-NEXT:    s_cmp_lg_u32 s13, 7
 ; GFX900-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s13, 4
+; GFX900-NEXT:    s_cmp_lg_u32 s13, 4
 ; GFX900-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s13, 5
+; GFX900-NEXT:    s_cmp_lg_u32 s13, 5
 ; GFX900-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s13, 2
+; GFX900-NEXT:    s_cmp_lg_u32 s13, 2
 ; GFX900-NEXT:    v_mov_b32_e32 v5, s12
 ; GFX900-NEXT:    s_cselect_b64 s[6:7], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s13, 3
+; GFX900-NEXT:    s_cmp_lg_u32 s13, 3
 ; GFX900-NEXT:    s_cselect_b64 s[8:9], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s13, 0
+; GFX900-NEXT:    s_cmp_lg_u32 s13, 0
 ; GFX900-NEXT:    s_cselect_b64 s[10:11], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s13, 1
+; GFX900-NEXT:    s_cmp_lg_u32 s13, 1
 ; GFX900-NEXT:    s_cselect_b64 s[12:13], -1, 0
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cndmask_b32_e32 v6, v3, v5, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v6, v5, v3, vcc
 ; GFX900-NEXT:    s_mov_b64 vcc, s[0:1]
-; GFX900-NEXT:    v_cndmask_b32_sdwa v3, v3, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cndmask_b32_sdwa v3, v5, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX900-NEXT:    s_mov_b64 vcc, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v7, v2, v5, s[2:3]
-; GFX900-NEXT:    v_cndmask_b32_sdwa v2, v2, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cndmask_b32_e64 v7, v5, v2, s[2:3]
+; GFX900-NEXT:    v_cndmask_b32_sdwa v2, v5, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX900-NEXT:    s_mov_b64 vcc, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v8, v1, v5, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_sdwa v1, v1, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cndmask_b32_e64 v8, v5, v1, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_sdwa v1, v5, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX900-NEXT:    s_mov_b64 vcc, s[12:13]
-; GFX900-NEXT:    v_cndmask_b32_e64 v9, v0, v5, s[10:11]
-; GFX900-NEXT:    v_cndmask_b32_sdwa v0, v0, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cndmask_b32_e64 v9, v5, v0, s[10:11]
+; GFX900-NEXT:    v_cndmask_b32_sdwa v0, v5, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX900-NEXT:    v_perm_b32 v3, v3, v6, s14
 ; GFX900-NEXT:    v_perm_b32 v2, v2, v7, s14
 ; GFX900-NEXT:    v_perm_b32 v1, v1, v8, s14
@@ -1208,36 +1208,36 @@ define amdgpu_kernel void @v_insertelement_v8bf16_dynamic(ptr addrspace(1) %out,
 ; GFX942-NEXT:    s_mov_b32 s14, 0x5040100
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    global_load_dwordx4 v[0:3], v4, s[18:19]
-; GFX942-NEXT:    s_cmp_eq_u32 s13, 6
+; GFX942-NEXT:    s_cmp_lg_u32 s13, 6
 ; GFX942-NEXT:    s_cselect_b64 vcc, -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s13, 7
+; GFX942-NEXT:    s_cmp_lg_u32 s13, 7
 ; GFX942-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s13, 4
+; GFX942-NEXT:    s_cmp_lg_u32 s13, 4
 ; GFX942-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s13, 5
+; GFX942-NEXT:    s_cmp_lg_u32 s13, 5
 ; GFX942-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s13, 2
+; GFX942-NEXT:    s_cmp_lg_u32 s13, 2
 ; GFX942-NEXT:    v_mov_b32_e32 v5, s12
 ; GFX942-NEXT:    s_cselect_b64 s[6:7], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s13, 3
+; GFX942-NEXT:    s_cmp_lg_u32 s13, 3
 ; GFX942-NEXT:    s_cselect_b64 s[8:9], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s13, 0
+; GFX942-NEXT:    s_cmp_lg_u32 s13, 0
 ; GFX942-NEXT:    s_cselect_b64 s[10:11], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s13, 1
+; GFX942-NEXT:    s_cmp_lg_u32 s13, 1
 ; GFX942-NEXT:    s_cselect_b64 s[12:13], -1, 0
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
-; GFX942-NEXT:    v_cndmask_b32_e32 v6, v3, v5, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v6, v5, v3, vcc
 ; GFX942-NEXT:    s_mov_b64 vcc, s[0:1]
-; GFX942-NEXT:    v_cndmask_b32_sdwa v3, v3, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_sdwa v3, v5, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX942-NEXT:    s_mov_b64 vcc, s[4:5]
-; GFX942-NEXT:    v_cndmask_b32_e64 v7, v2, v5, s[2:3]
-; GFX942-NEXT:    v_cndmask_b32_sdwa v2, v2, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_e64 v7, v5, v2, s[2:3]
+; GFX942-NEXT:    v_cndmask_b32_sdwa v2, v5, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX942-NEXT:    s_mov_b64 vcc, s[8:9]
-; GFX942-NEXT:    v_cndmask_b32_e64 v8, v1, v5, s[6:7]
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v1, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_e64 v8, v5, v1, s[6:7]
+; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v5, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX942-NEXT:    s_mov_b64 vcc, s[12:13]
-; GFX942-NEXT:    v_cndmask_b32_e64 v9, v0, v5, s[10:11]
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v0, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_e64 v9, v5, v0, s[10:11]
+; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v5, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX942-NEXT:    v_perm_b32 v3, v3, v6, s14
 ; GFX942-NEXT:    v_perm_b32 v2, v2, v7, s14
 ; GFX942-NEXT:    v_perm_b32 v1, v1, v8, s14
@@ -1367,84 +1367,84 @@ define amdgpu_kernel void @v_insertelement_v16bf16_dynamic(ptr addrspace(1) %out
 ; SI-NEXT:    v_mov_b32_e32 v5, 0
 ; SI-NEXT:    buffer_load_dwordx4 v[7:10], v[4:5], s[0:3], 0 addr64
 ; SI-NEXT:    buffer_load_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64 offset:16
-; SI-NEXT:    s_cmp_eq_u32 s7, 6
+; SI-NEXT:    s_cmp_lg_u32 s7, 6
 ; SI-NEXT:    v_mov_b32_e32 v6, s6
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s7, 7
+; SI-NEXT:    s_cmp_lg_u32 s7, 7
 ; SI-NEXT:    s_mov_b64 s[14:15], s[2:3]
 ; SI-NEXT:    s_waitcnt vmcnt(1)
-; SI-NEXT:    v_cndmask_b32_e32 v11, v10, v6, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v11, v6, v10, vcc
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s7, 4
+; SI-NEXT:    s_cmp_lg_u32 s7, 4
 ; SI-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s7, 5
+; SI-NEXT:    s_cmp_lg_u32 s7, 5
 ; SI-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
 ; SI-NEXT:    v_lshrrev_b32_e32 v12, 16, v9
-; SI-NEXT:    v_cndmask_b32_e64 v9, v9, v6, s[0:1]
+; SI-NEXT:    v_cndmask_b32_e64 v9, v6, v9, s[0:1]
 ; SI-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s7, 2
+; SI-NEXT:    s_cmp_lg_u32 s7, 2
 ; SI-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s7, 3
-; SI-NEXT:    v_cndmask_b32_e32 v10, v10, v6, vcc
+; SI-NEXT:    s_cmp_lg_u32 s7, 3
+; SI-NEXT:    v_cndmask_b32_e32 v10, v6, v10, vcc
 ; SI-NEXT:    v_lshrrev_b32_e32 v13, 16, v8
-; SI-NEXT:    v_cndmask_b32_e64 v8, v8, v6, s[2:3]
+; SI-NEXT:    v_cndmask_b32_e64 v8, v6, v8, s[2:3]
 ; SI-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s7, 0
+; SI-NEXT:    s_cmp_lg_u32 s7, 0
 ; SI-NEXT:    v_and_b32_e32 v11, 0xffff, v11
 ; SI-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
-; SI-NEXT:    v_cndmask_b32_e64 v12, v12, v6, s[0:1]
+; SI-NEXT:    v_cndmask_b32_e64 v12, v6, v12, s[0:1]
 ; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
 ; SI-NEXT:    v_or_b32_e32 v10, v11, v10
 ; SI-NEXT:    v_lshlrev_b32_e32 v11, 16, v12
-; SI-NEXT:    v_cndmask_b32_e64 v12, v13, v6, s[2:3]
-; SI-NEXT:    s_cmp_eq_u32 s7, 1
+; SI-NEXT:    v_cndmask_b32_e64 v12, v6, v13, s[2:3]
+; SI-NEXT:    s_cmp_lg_u32 s7, 1
 ; SI-NEXT:    v_lshrrev_b32_e32 v14, 16, v7
 ; SI-NEXT:    v_and_b32_e32 v8, 0xffff, v8
 ; SI-NEXT:    v_lshlrev_b32_e32 v12, 16, v12
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s7, 14
-; SI-NEXT:    v_cndmask_b32_e64 v7, v7, v6, s[4:5]
+; SI-NEXT:    s_cmp_lg_u32 s7, 14
+; SI-NEXT:    v_cndmask_b32_e64 v7, v6, v7, s[4:5]
 ; SI-NEXT:    v_or_b32_e32 v8, v8, v12
-; SI-NEXT:    v_cndmask_b32_e32 v12, v14, v6, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v12, v6, v14, vcc
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s7, 15
+; SI-NEXT:    s_cmp_lg_u32 s7, 15
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_lshrrev_b32_e32 v15, 16, v3
 ; SI-NEXT:    v_and_b32_e32 v7, 0xffff, v7
 ; SI-NEXT:    v_lshlrev_b32_e32 v12, 16, v12
-; SI-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s7, 12
+; SI-NEXT:    s_cmp_lg_u32 s7, 12
 ; SI-NEXT:    v_or_b32_e32 v7, v7, v12
-; SI-NEXT:    v_cndmask_b32_e32 v12, v15, v6, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v12, v6, v15, vcc
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s7, 13
+; SI-NEXT:    s_cmp_lg_u32 s7, 13
 ; SI-NEXT:    v_lshrrev_b32_e32 v16, 16, v2
 ; SI-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; SI-NEXT:    v_lshlrev_b32_e32 v12, 16, v12
-; SI-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s7, 10
+; SI-NEXT:    s_cmp_lg_u32 s7, 10
 ; SI-NEXT:    v_or_b32_e32 v3, v3, v12
-; SI-NEXT:    v_cndmask_b32_e32 v12, v16, v6, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v12, v6, v16, vcc
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s7, 11
+; SI-NEXT:    s_cmp_lg_u32 s7, 11
 ; SI-NEXT:    v_lshrrev_b32_e32 v17, 16, v1
 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; SI-NEXT:    v_lshlrev_b32_e32 v12, 16, v12
-; SI-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s7, 8
+; SI-NEXT:    s_cmp_lg_u32 s7, 8
 ; SI-NEXT:    v_and_b32_e32 v9, 0xffff, v9
 ; SI-NEXT:    v_or_b32_e32 v2, v2, v12
-; SI-NEXT:    v_cndmask_b32_e32 v12, v17, v6, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v12, v6, v17, vcc
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_cmp_eq_u32 s7, 9
+; SI-NEXT:    s_cmp_lg_u32 s7, 9
 ; SI-NEXT:    v_or_b32_e32 v9, v9, v11
 ; SI-NEXT:    v_lshrrev_b32_e32 v11, 16, v0
-; SI-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    v_cndmask_b32_e32 v6, v11, v6, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v6, v6, v11, vcc
 ; SI-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; SI-NEXT:    v_lshlrev_b32_e32 v12, 16, v12
 ; SI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
@@ -1475,76 +1475,76 @@ define amdgpu_kernel void @v_insertelement_v16bf16_dynamic(ptr addrspace(1) %out
 ; VI-NEXT:    v_add_u32_e32 v8, vcc, s0, v8
 ; VI-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
 ; VI-NEXT:    v_add_u32_e32 v10, vcc, 16, v8
-; VI-NEXT:    s_cmp_eq_u32 s7, 14
+; VI-NEXT:    s_cmp_lg_u32 s7, 14
 ; VI-NEXT:    v_addc_u32_e32 v11, vcc, 0, v9, vcc
 ; VI-NEXT:    v_mov_b32_e32 v12, s6
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 15
+; VI-NEXT:    s_cmp_lg_u32 s7, 15
 ; VI-NEXT:    s_waitcnt vmcnt(1)
-; VI-NEXT:    v_cndmask_b32_e32 v13, v3, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v13, v12, v3, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 12
+; VI-NEXT:    s_cmp_lg_u32 s7, 12
 ; VI-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 13
+; VI-NEXT:    s_cmp_lg_u32 s7, 13
 ; VI-NEXT:    v_lshrrev_b32_e32 v14, 16, v2
-; VI-NEXT:    v_cndmask_b32_e64 v2, v2, v12, s[0:1]
+; VI-NEXT:    v_cndmask_b32_e64 v2, v12, v2, s[0:1]
 ; VI-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 10
+; VI-NEXT:    s_cmp_lg_u32 s7, 10
 ; VI-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 11
+; VI-NEXT:    s_cmp_lg_u32 s7, 11
 ; VI-NEXT:    v_lshrrev_b32_e32 v15, 16, v1
-; VI-NEXT:    v_cndmask_b32_e64 v1, v1, v12, s[2:3]
+; VI-NEXT:    v_cndmask_b32_e64 v1, v12, v1, s[2:3]
 ; VI-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 8
+; VI-NEXT:    s_cmp_lg_u32 s7, 8
 ; VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 ; VI-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; VI-NEXT:    v_cndmask_b32_e64 v15, v15, v12, s[2:3]
-; VI-NEXT:    s_cmp_eq_u32 s7, 9
+; VI-NEXT:    v_cndmask_b32_e64 v15, v12, v15, s[2:3]
+; VI-NEXT:    s_cmp_lg_u32 s7, 9
 ; VI-NEXT:    v_lshrrev_b32_e32 v16, 16, v0
-; VI-NEXT:    v_cndmask_b32_e32 v3, v3, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
 ; VI-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 6
+; VI-NEXT:    s_cmp_lg_u32 s7, 6
 ; VI-NEXT:    v_or_b32_sdwa v1, v1, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NEXT:    v_cndmask_b32_e32 v15, v16, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v15, v12, v16, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 7
-; VI-NEXT:    v_cndmask_b32_e64 v0, v0, v12, s[4:5]
+; VI-NEXT:    s_cmp_lg_u32 s7, 7
+; VI-NEXT:    v_cndmask_b32_e64 v0, v12, v0, s[4:5]
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    v_lshrrev_b32_e32 v17, 16, v7
 ; VI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; VI-NEXT:    v_cndmask_b32_e64 v14, v14, v12, s[0:1]
+; VI-NEXT:    v_cndmask_b32_e64 v14, v12, v14, s[0:1]
 ; VI-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
-; VI-NEXT:    v_cndmask_b32_e32 v7, v7, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v7, v12, v7, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 4
+; VI-NEXT:    s_cmp_lg_u32 s7, 4
 ; VI-NEXT:    v_or_b32_sdwa v3, v13, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    v_lshlrev_b32_e32 v13, 16, v14
 ; VI-NEXT:    v_or_b32_sdwa v0, v0, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NEXT:    v_cndmask_b32_e32 v15, v17, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v15, v12, v17, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 5
+; VI-NEXT:    s_cmp_lg_u32 s7, 5
 ; VI-NEXT:    v_or_b32_sdwa v2, v2, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    v_lshrrev_b32_e32 v13, 16, v6
-; VI-NEXT:    v_cndmask_b32_e32 v6, v6, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v6, v12, v6, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 2
-; VI-NEXT:    v_cndmask_b32_e32 v13, v13, v12, vcc
+; VI-NEXT:    s_cmp_lg_u32 s7, 2
+; VI-NEXT:    v_cndmask_b32_e32 v13, v12, v13, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 3
+; VI-NEXT:    s_cmp_lg_u32 s7, 3
 ; VI-NEXT:    v_lshrrev_b32_e32 v14, 16, v5
-; VI-NEXT:    v_cndmask_b32_e32 v5, v5, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v5, v12, v5, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 0
+; VI-NEXT:    s_cmp_lg_u32 s7, 0
 ; VI-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
-; VI-NEXT:    v_cndmask_b32_e32 v14, v14, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v14, v12, v14, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 1
+; VI-NEXT:    s_cmp_lg_u32 s7, 1
 ; VI-NEXT:    v_or_b32_sdwa v6, v6, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    v_lshrrev_b32_e32 v13, 16, v4
-; VI-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v4, v12, v4, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    v_cndmask_b32_e32 v12, v13, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
 ; VI-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
 ; VI-NEXT:    v_lshlrev_b32_e32 v14, 16, v14
 ; VI-NEXT:    v_lshlrev_b32_e32 v12, 16, v12
@@ -1564,67 +1564,67 @@ define amdgpu_kernel void @v_insertelement_v16bf16_dynamic(ptr addrspace(1) %out
 ; GFX900-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX900-NEXT:    global_load_dwordx4 v[0:3], v8, s[38:39]
 ; GFX900-NEXT:    global_load_dwordx4 v[4:7], v8, s[38:39] offset:16
-; GFX900-NEXT:    s_cmp_eq_u32 s29, 6
+; GFX900-NEXT:    s_cmp_lg_u32 s29, 6
 ; GFX900-NEXT:    s_cselect_b64 vcc, -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s29, 7
+; GFX900-NEXT:    s_cmp_lg_u32 s29, 7
 ; GFX900-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s29, 4
+; GFX900-NEXT:    s_cmp_lg_u32 s29, 4
 ; GFX900-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s29, 5
+; GFX900-NEXT:    s_cmp_lg_u32 s29, 5
 ; GFX900-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s29, 2
+; GFX900-NEXT:    s_cmp_lg_u32 s29, 2
 ; GFX900-NEXT:    s_cselect_b64 s[6:7], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s29, 3
+; GFX900-NEXT:    s_cmp_lg_u32 s29, 3
 ; GFX900-NEXT:    s_cselect_b64 s[8:9], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s29, 0
+; GFX900-NEXT:    s_cmp_lg_u32 s29, 0
 ; GFX900-NEXT:    s_cselect_b64 s[10:11], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s29, 1
+; GFX900-NEXT:    s_cmp_lg_u32 s29, 1
 ; GFX900-NEXT:    s_cselect_b64 s[12:13], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s29, 14
+; GFX900-NEXT:    s_cmp_lg_u32 s29, 14
 ; GFX900-NEXT:    v_mov_b32_e32 v9, s28
 ; GFX900-NEXT:    s_cselect_b64 s[14:15], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s29, 15
+; GFX900-NEXT:    s_cmp_lg_u32 s29, 15
 ; GFX900-NEXT:    s_cselect_b64 s[16:17], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s29, 12
+; GFX900-NEXT:    s_cmp_lg_u32 s29, 12
 ; GFX900-NEXT:    s_cselect_b64 s[18:19], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s29, 13
+; GFX900-NEXT:    s_cmp_lg_u32 s29, 13
 ; GFX900-NEXT:    s_cselect_b64 s[20:21], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s29, 10
+; GFX900-NEXT:    s_cmp_lg_u32 s29, 10
 ; GFX900-NEXT:    s_cselect_b64 s[22:23], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s29, 11
+; GFX900-NEXT:    s_cmp_lg_u32 s29, 11
 ; GFX900-NEXT:    s_cselect_b64 s[24:25], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s29, 8
+; GFX900-NEXT:    s_cmp_lg_u32 s29, 8
 ; GFX900-NEXT:    s_cselect_b64 s[26:27], -1, 0
-; GFX900-NEXT:    s_cmp_eq_u32 s29, 9
+; GFX900-NEXT:    s_cmp_lg_u32 s29, 9
 ; GFX900-NEXT:    s_cselect_b64 s[28:29], -1, 0
 ; GFX900-NEXT:    s_waitcnt vmcnt(1)
-; GFX900-NEXT:    v_cndmask_b32_e32 v10, v3, v9, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v10, v9, v3, vcc
 ; GFX900-NEXT:    s_mov_b64 vcc, s[0:1]
-; GFX900-NEXT:    v_cndmask_b32_sdwa v3, v3, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cndmask_b32_sdwa v3, v9, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX900-NEXT:    s_mov_b64 vcc, s[4:5]
-; GFX900-NEXT:    v_cndmask_b32_e64 v11, v2, v9, s[2:3]
-; GFX900-NEXT:    v_cndmask_b32_sdwa v2, v2, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cndmask_b32_e64 v11, v9, v2, s[2:3]
+; GFX900-NEXT:    v_cndmask_b32_sdwa v2, v9, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX900-NEXT:    s_mov_b64 vcc, s[8:9]
-; GFX900-NEXT:    v_cndmask_b32_e64 v12, v1, v9, s[6:7]
-; GFX900-NEXT:    v_cndmask_b32_sdwa v1, v1, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cndmask_b32_e64 v12, v9, v1, s[6:7]
+; GFX900-NEXT:    v_cndmask_b32_sdwa v1, v9, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX900-NEXT:    s_mov_b64 vcc, s[12:13]
-; GFX900-NEXT:    v_cndmask_b32_e64 v13, v0, v9, s[10:11]
-; GFX900-NEXT:    v_cndmask_b32_sdwa v0, v0, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cndmask_b32_e64 v13, v9, v0, s[10:11]
+; GFX900-NEXT:    v_cndmask_b32_sdwa v0, v9, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX900-NEXT:    s_mov_b64 vcc, s[16:17]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_cndmask_b32_e64 v14, v7, v9, s[14:15]
-; GFX900-NEXT:    v_cndmask_b32_sdwa v7, v7, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cndmask_b32_e64 v14, v9, v7, s[14:15]
+; GFX900-NEXT:    v_cndmask_b32_sdwa v7, v9, v7, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX900-NEXT:    s_mov_b64 vcc, s[20:21]
-; GFX900-NEXT:    v_cndmask_b32_e64 v15, v6, v9, s[18:19]
-; GFX900-NEXT:    v_cndmask_b32_sdwa v6, v6, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cndmask_b32_e64 v15, v9, v6, s[18:19]
+; GFX900-NEXT:    v_cndmask_b32_sdwa v6, v9, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX900-NEXT:    s_mov_b64 vcc, s[24:25]
 ; GFX900-NEXT:    v_perm_b32 v3, v3, v10, s30
-; GFX900-NEXT:    v_cndmask_b32_e64 v10, v5, v9, s[22:23]
-; GFX900-NEXT:    v_cndmask_b32_sdwa v5, v5, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cndmask_b32_e64 v10, v9, v5, s[22:23]
+; GFX900-NEXT:    v_cndmask_b32_sdwa v5, v9, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX900-NEXT:    s_mov_b64 vcc, s[28:29]
 ; GFX900-NEXT:    v_perm_b32 v2, v2, v11, s30
-; GFX900-NEXT:    v_cndmask_b32_e64 v11, v4, v9, s[26:27]
-; GFX900-NEXT:    v_cndmask_b32_sdwa v4, v4, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-NEXT:    v_cndmask_b32_e64 v11, v9, v4, s[26:27]
+; GFX900-NEXT:    v_cndmask_b32_sdwa v4, v9, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX900-NEXT:    v_perm_b32 v7, v7, v14, s30
 ; GFX900-NEXT:    v_perm_b32 v6, v6, v15, s30
 ; GFX900-NEXT:    v_perm_b32 v5, v5, v10, s30
@@ -1645,65 +1645,65 @@ define amdgpu_kernel void @v_insertelement_v16bf16_dynamic(ptr addrspace(1) %out
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    global_load_dwordx4 v[0:3], v8, s[38:39]
 ; GFX942-NEXT:    global_load_dwordx4 v[4:7], v8, s[38:39] offset:16
-; GFX942-NEXT:    s_cmp_eq_u32 s29, 6
+; GFX942-NEXT:    s_cmp_lg_u32 s29, 6
 ; GFX942-NEXT:    s_cselect_b64 vcc, -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s29, 7
+; GFX942-NEXT:    s_cmp_lg_u32 s29, 7
 ; GFX942-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s29, 4
+; GFX942-NEXT:    s_cmp_lg_u32 s29, 4
 ; GFX942-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s29, 5
+; GFX942-NEXT:    s_cmp_lg_u32 s29, 5
 ; GFX942-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s29, 2
+; GFX942-NEXT:    s_cmp_lg_u32 s29, 2
 ; GFX942-NEXT:    s_cselect_b64 s[6:7], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s29, 3
+; GFX942-NEXT:    s_cmp_lg_u32 s29, 3
 ; GFX942-NEXT:    s_cselect_b64 s[8:9], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s29, 0
+; GFX942-NEXT:    s_cmp_lg_u32 s29, 0
 ; GFX942-NEXT:    s_cselect_b64 s[10:11], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s29, 1
+; GFX942-NEXT:    s_cmp_lg_u32 s29, 1
 ; GFX942-NEXT:    s_cselect_b64 s[12:13], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s29, 14
+; GFX942-NEXT:    s_cmp_lg_u32 s29, 14
 ; GFX942-NEXT:    v_mov_b32_e32 v9, s28
 ; GFX942-NEXT:    s_cselect_b64 s[14:15], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s29, 15
+; GFX942-NEXT:    s_cmp_lg_u32 s29, 15
 ; GFX942-NEXT:    s_cselect_b64 s[16:17], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s29, 12
+; GFX942-NEXT:    s_cmp_lg_u32 s29, 12
 ; GFX942-NEXT:    s_cselect_b64 s[18:19], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s29, 13
+; GFX942-NEXT:    s_cmp_lg_u32 s29, 13
 ; GFX942-NEXT:    s_cselect_b64 s[20:21], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s29, 10
+; GFX942-NEXT:    s_cmp_lg_u32 s29, 10
 ; GFX942-NEXT:    s_cselect_b64 s[22:23], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s29, 11
+; GFX942-NEXT:    s_cmp_lg_u32 s29, 11
 ; GFX942-NEXT:    s_cselect_b64 s[24:25], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s29, 8
+; GFX942-NEXT:    s_cmp_lg_u32 s29, 8
 ; GFX942-NEXT:    s_cselect_b64 s[26:27], -1, 0
-; GFX942-NEXT:    s_cmp_eq_u32 s29, 9
+; GFX942-NEXT:    s_cmp_lg_u32 s29, 9
 ; GFX942-NEXT:    s_cselect_b64 s[28:29], -1, 0
 ; GFX942-NEXT:    s_waitcnt vmcnt(1)
-; GFX942-NEXT:    v_cndmask_b32_e32 v10, v3, v9, vcc
+; GFX942-NEXT:    v_cndmask_b32_e32 v10, v9, v3, vcc
 ; GFX942-NEXT:    s_mov_b64 vcc, s[0:1]
-; GFX942-NEXT:    v_cndmask_b32_sdwa v3, v3, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_sdwa v3, v9, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX942-NEXT:    s_mov_b64 vcc, s[4:5]
-; GFX942-NEXT:    v_cndmask_b32_e64 v11, v2, v9, s[2:3]
-; GFX942-NEXT:    v_cndmask_b32_sdwa v2, v2, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_e64 v11, v9, v2, s[2:3]
+; GFX942-NEXT:    v_cndmask_b32_sdwa v2, v9, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX942-NEXT:    s_mov_b64 vcc, s[8:9]
-; GFX942-NEXT:    v_cndmask_b32_e64 v12, v1, v9, s[6:7]
-; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v1, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_e64 v12, v9, v1, s[6:7]
+; GFX942-NEXT:    v_cndmask_b32_sdwa v1, v9, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX942-NEXT:    s_mov_b64 vcc, s[12:13]
-; GFX942-NEXT:    v_cndmask_b32_e64 v13, v0, v9, s[10:11]
-; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v0, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_e64 v13, v9, v0, s[10:11]
+; GFX942-NEXT:    v_cndmask_b32_sdwa v0, v9, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX942-NEXT:    s_mov_b64 vcc, s[16:17]
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
-; GFX942-NEXT:    v_cndmask_b32_e64 v14, v7, v9, s[14:15]
-; GFX942-NEXT:    v_cndmask_b32_sdwa v7, v7, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_e64 v14, v9, v7, s[14:15]
+; GFX942-NEXT:    v_cndmask_b32_sdwa v7, v9, v7, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX942-NEXT:    s_mov_b64 vcc, s[20:21]
-; GFX942-NEXT:    v_cndmask_b32_e64 v15, v6, v9, s[18:19]
-; GFX942-NEXT:    v_cndmask_b32_sdwa v6, v6, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_e64 v15, v9, v6, s[18:19]
+; GFX942-NEXT:    v_cndmask_b32_sdwa v6, v9, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX942-NEXT:    s_mov_b64 vcc, s[24:25]
-; GFX942-NEXT:    v_cndmask_b32_e64 v16, v5, v9, s[22:23]
-; GFX942-NEXT:    v_cndmask_b32_sdwa v5, v5, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_e64 v16, v9, v5, s[22:23]
+; GFX942-NEXT:    v_cndmask_b32_sdwa v5, v9, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX942-NEXT:    s_mov_b64 vcc, s[28:29]
-; GFX942-NEXT:    v_cndmask_b32_e64 v17, v4, v9, s[26:27]
-; GFX942-NEXT:    v_cndmask_b32_sdwa v4, v4, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX942-NEXT:    v_cndmask_b32_e64 v17, v9, v4, s[26:27]
+; GFX942-NEXT:    v_cndmask_b32_sdwa v4, v9, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX942-NEXT:    v_perm_b32 v7, v7, v14, s30
 ; GFX942-NEXT:    v_perm_b32 v6, v6, v15, s30
 ; GFX942-NEXT:    v_perm_b32 v5, v5, v16, s30
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
index e0dacb7a59a42..f104cbe9f9e41 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
@@ -2700,36 +2700,36 @@ define amdgpu_kernel void @v_insertelement_v8f16_dynamic(ptr addrspace(1) %out,
 ; GFX9-NEXT:    s_mov_b32 s14, 0x5040100
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dwordx4 v[0:3], v4, s[18:19]
-; GFX9-NEXT:    s_cmp_eq_u32 s13, 6
+; GFX9-NEXT:    s_cmp_lg_u32 s13, 6
 ; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s13, 7
+; GFX9-NEXT:    s_cmp_lg_u32 s13, 7
 ; GFX9-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s13, 4
+; GFX9-NEXT:    s_cmp_lg_u32 s13, 4
 ; GFX9-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s13, 5
+; GFX9-NEXT:    s_cmp_lg_u32 s13, 5
 ; GFX9-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s13, 2
+; GFX9-NEXT:    s_cmp_lg_u32 s13, 2
 ; GFX9-NEXT:    v_mov_b32_e32 v5, s12
 ; GFX9-NEXT:    s_cselect_b64 s[6:7], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s13, 3
+; GFX9-NEXT:    s_cmp_lg_u32 s13, 3
 ; GFX9-NEXT:    s_cselect_b64 s[8:9], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s13, 0
+; GFX9-NEXT:    s_cmp_lg_u32 s13, 0
 ; GFX9-NEXT:    s_cselect_b64 s[10:11], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s13, 1
+; GFX9-NEXT:    s_cmp_lg_u32 s13, 1
 ; GFX9-NEXT:    s_cselect_b64 s[12:13], -1, 0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v3, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v3, vcc
 ; GFX9-NEXT:    s_mov_b64 vcc, s[0:1]
-; GFX9-NEXT:    v_cndmask_b32_sdwa v3, v3, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_sdwa v3, v5, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    s_mov_b64 vcc, s[4:5]
-; GFX9-NEXT:    v_cndmask_b32_e64 v7, v2, v5, s[2:3]
-; GFX9-NEXT:    v_cndmask_b32_sdwa v2, v2, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_e64 v7, v5, v2, s[2:3]
+; GFX9-NEXT:    v_cndmask_b32_sdwa v2, v5, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    s_mov_b64 vcc, s[8:9]
-; GFX9-NEXT:    v_cndmask_b32_e64 v8, v1, v5, s[6:7]
-; GFX9-NEXT:    v_cndmask_b32_sdwa v1, v1, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_e64 v8, v5, v1, s[6:7]
+; GFX9-NEXT:    v_cndmask_b32_sdwa v1, v5, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    s_mov_b64 vcc, s[12:13]
-; GFX9-NEXT:    v_cndmask_b32_e64 v9, v0, v5, s[10:11]
-; GFX9-NEXT:    v_cndmask_b32_sdwa v0, v0, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_e64 v9, v5, v0, s[10:11]
+; GFX9-NEXT:    v_cndmask_b32_sdwa v0, v5, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    v_perm_b32 v3, v3, v6, s14
 ; GFX9-NEXT:    v_perm_b32 v2, v2, v7, s14
 ; GFX9-NEXT:    v_perm_b32 v1, v1, v8, s14
@@ -2752,41 +2752,41 @@ define amdgpu_kernel void @v_insertelement_v8f16_dynamic(ptr addrspace(1) %out,
 ; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
 ; VI-NEXT:    v_mov_b32_e32 v5, s1
 ; VI-NEXT:    v_add_u32_e32 v4, vcc, s0, v4
-; VI-NEXT:    s_cmp_eq_u32 s5, 6
+; VI-NEXT:    s_cmp_lg_u32 s5, 6
 ; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-NEXT:    v_mov_b32_e32 v6, s4
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s5, 7
+; VI-NEXT:    s_cmp_lg_u32 s5, 7
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cndmask_b32_e32 v7, v3, v6, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v7, v6, v3, vcc
 ; VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s5, 4
-; VI-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; VI-NEXT:    s_cmp_lg_u32 s5, 4
+; VI-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s5, 5
+; VI-NEXT:    s_cmp_lg_u32 s5, 5
 ; VI-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
-; VI-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s5, 2
+; VI-NEXT:    s_cmp_lg_u32 s5, 2
 ; VI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; VI-NEXT:    v_cndmask_b32_e32 v8, v8, v6, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v8, v6, v8, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s5, 3
+; VI-NEXT:    s_cmp_lg_u32 s5, 3
 ; VI-NEXT:    v_lshrrev_b32_e32 v9, 16, v1
 ; VI-NEXT:    v_or_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    v_lshlrev_b32_e32 v7, 16, v8
-; VI-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s5, 0
+; VI-NEXT:    s_cmp_lg_u32 s5, 0
 ; VI-NEXT:    v_or_b32_sdwa v2, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NEXT:    v_cndmask_b32_e32 v7, v9, v6, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v7, v6, v9, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s5, 1
-; VI-NEXT:    v_cndmask_b32_e32 v8, v0, v6, vcc
+; VI-NEXT:    s_cmp_lg_u32 s5, 1
+; VI-NEXT:    v_cndmask_b32_e32 v8, v6, v0, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
 ; VI-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
-; VI-NEXT:    v_cndmask_b32_sdwa v0, v0, v6, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_cndmask_b32_sdwa v0, v6, v0, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; VI-NEXT:    v_or_b32_sdwa v1, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
@@ -2808,14 +2808,14 @@ define amdgpu_kernel void @v_insertelement_v8f16_dynamic(ptr addrspace(1) %out,
 ; CI-NEXT:    v_mov_b32_e32 v5, s1
 ; CI-NEXT:    v_add_i32_e32 v4, vcc, s0, v4
 ; CI-NEXT:    v_cvt_f32_f16_e32 v6, s4
-; CI-NEXT:    s_cmp_eq_u32 s5, 7
+; CI-NEXT:    s_cmp_lg_u32 s5, 7
 ; CI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; CI-NEXT:    s_cselect_b64 vcc, -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 6
+; CI-NEXT:    s_cmp_lg_u32 s5, 6
 ; CI-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 5
+; CI-NEXT:    s_cmp_lg_u32 s5, 5
 ; CI-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 4
+; CI-NEXT:    s_cmp_lg_u32 s5, 4
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
 ; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
@@ -2827,30 +2827,30 @@ define amdgpu_kernel void @v_insertelement_v8f16_dynamic(ptr addrspace(1) %out,
 ; CI-NEXT:    v_lshrrev_b32_e32 v10, 16, v0
 ; CI-NEXT:    v_cvt_f32_f16_e32 v8, v8
 ; CI-NEXT:    v_cvt_f32_f16_e32 v10, v10
-; CI-NEXT:    v_cndmask_b32_e64 v3, v3, v6, s[0:1]
+; CI-NEXT:    v_cndmask_b32_e64 v3, v6, v3, s[0:1]
 ; CI-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 3
+; CI-NEXT:    s_cmp_lg_u32 s5, 3
 ; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v7, v6, v7, vcc
 ; CI-NEXT:    s_cselect_b64 vcc, -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 2
-; CI-NEXT:    v_cndmask_b32_e32 v9, v9, v6, vcc
+; CI-NEXT:    s_cmp_lg_u32 s5, 2
+; CI-NEXT:    v_cndmask_b32_e32 v9, v6, v9, vcc
 ; CI-NEXT:    s_cselect_b64 vcc, -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 1
-; CI-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; CI-NEXT:    s_cmp_lg_u32 s5, 1
+; CI-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
 ; CI-NEXT:    s_cselect_b64 vcc, -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 0
-; CI-NEXT:    v_cndmask_b32_e64 v8, v8, v6, s[2:3]
+; CI-NEXT:    s_cmp_lg_u32 s5, 0
+; CI-NEXT:    v_cndmask_b32_e64 v8, v6, v8, s[2:3]
 ; CI-NEXT:    v_cvt_f16_f32_e32 v7, v7
-; CI-NEXT:    v_cndmask_b32_e32 v10, v10, v6, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v10, v6, v10, vcc
 ; CI-NEXT:    s_cselect_b64 vcc, -1, 0
-; CI-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[0:1]
+; CI-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[0:1]
 ; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; CI-NEXT:    v_cvt_f16_f32_e32 v8, v8
 ; CI-NEXT:    v_cvt_f16_f32_e32 v9, v9
 ; CI-NEXT:    v_cvt_f16_f32_e32 v10, v10
-; CI-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
 ; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
@@ -2874,78 +2874,79 @@ define amdgpu_kernel void @v_insertelement_v8f16_dynamic(ptr addrspace(1) %out,
 ; GFX11-TRUE16-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
 ; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-TRUE16-NEXT:    global_load_b128 v[0:3], v4, s[2:3]
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 6
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 6
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 7
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 7
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s3, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 4
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 4
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s6, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 5
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 5
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s7, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 2
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 2
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s8, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 3
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 3
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s9, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 0
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 0
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s10, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 1
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 1
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s5, -1, 0
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, s4, s2
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, v3.h, s4, s3
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, s4, s6
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, s4, s7
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, s4, s8
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, s4, s9
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, s4, s10
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, s4, s5
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, s4, v3.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, s4, v3.h, s3
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, s4, v2.l, s6
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, s4, v2.h, s7
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, s4, v1.l, s8
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, s4, v1.h, s9
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, s4, v0.l, s10
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, s4, v0.h, s5
 ; GFX11-TRUE16-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
 ; GFX11-TRUE16-NEXT:    s_endpgm
 ;
 ; GFX11-FAKE16-LABEL: v_insertelement_v8f16_dynamic:
 ; GFX11-FAKE16:       ; %bb.0:
-; GFX11-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX11-FAKE16-NEXT:    s_clause 0x1
+; GFX11-FAKE16-NEXT:    s_load_b128 s[8:11], s[4:5], 0x0
+; GFX11-FAKE16-NEXT:    s_load_b64 s[2:3], s[4:5], 0x10
 ; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-FAKE16-NEXT:    s_load_b64 s[4:5], s[4:5], 0x10
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 6
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
-; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-FAKE16-NEXT:    global_load_b128 v[0:3], v4, s[2:3]
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 6
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 7
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 7
+; GFX11-FAKE16-NEXT:    global_load_b128 v[0:3], v4, s[10:11]
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v5, v3, s4, s2
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, s2, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 4
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s3, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 5
+; GFX11-FAKE16-NEXT:    s_cselect_b32 s0, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 5
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v2, v2, s4, s3
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s3, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v2, s2, v2, s0
+; GFX11-FAKE16-NEXT:    s_cselect_b32 s0, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 2
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v3, v3, s4, s2
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 3
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v1, v1, s4, s2
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, s2, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v0
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v7, v7, s4, s2
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 1
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v0, v0, s4, s2
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v6, v6, s4, s3
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v8, v8, s4, s2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, s2, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v6, s2, v6, s0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v8, s2, v8, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v3, v5, 0x5040100
 ; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v7, v1, 0x5040100
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v6, v2, 0x5040100
 ; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v8, v0, 0x5040100
-; GFX11-FAKE16-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-FAKE16-NEXT:    global_store_b128 v4, v[0:3], s[8:9]
 ; GFX11-FAKE16-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %tid.ext = sext i32 %tid to i64
@@ -3201,67 +3202,67 @@ define amdgpu_kernel void @v_insertelement_v16f16_dynamic(ptr addrspace(1) %out,
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dwordx4 v[0:3], v8, s[38:39]
 ; GFX9-NEXT:    global_load_dwordx4 v[4:7], v8, s[38:39] offset:16
-; GFX9-NEXT:    s_cmp_eq_u32 s29, 6
+; GFX9-NEXT:    s_cmp_lg_u32 s29, 6
 ; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s29, 7
+; GFX9-NEXT:    s_cmp_lg_u32 s29, 7
 ; GFX9-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s29, 4
+; GFX9-NEXT:    s_cmp_lg_u32 s29, 4
 ; GFX9-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s29, 5
+; GFX9-NEXT:    s_cmp_lg_u32 s29, 5
 ; GFX9-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s29, 2
+; GFX9-NEXT:    s_cmp_lg_u32 s29, 2
 ; GFX9-NEXT:    s_cselect_b64 s[6:7], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s29, 3
+; GFX9-NEXT:    s_cmp_lg_u32 s29, 3
 ; GFX9-NEXT:    s_cselect_b64 s[8:9], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s29, 0
+; GFX9-NEXT:    s_cmp_lg_u32 s29, 0
 ; GFX9-NEXT:    s_cselect_b64 s[10:11], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s29, 1
+; GFX9-NEXT:    s_cmp_lg_u32 s29, 1
 ; GFX9-NEXT:    s_cselect_b64 s[12:13], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s29, 14
+; GFX9-NEXT:    s_cmp_lg_u32 s29, 14
 ; GFX9-NEXT:    v_mov_b32_e32 v9, s28
 ; GFX9-NEXT:    s_cselect_b64 s[14:15], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s29, 15
+; GFX9-NEXT:    s_cmp_lg_u32 s29, 15
 ; GFX9-NEXT:    s_cselect_b64 s[16:17], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s29, 12
+; GFX9-NEXT:    s_cmp_lg_u32 s29, 12
 ; GFX9-NEXT:    s_cselect_b64 s[18:19], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s29, 13
+; GFX9-NEXT:    s_cmp_lg_u32 s29, 13
 ; GFX9-NEXT:    s_cselect_b64 s[20:21], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s29, 10
+; GFX9-NEXT:    s_cmp_lg_u32 s29, 10
 ; GFX9-NEXT:    s_cselect_b64 s[22:23], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s29, 11
+; GFX9-NEXT:    s_cmp_lg_u32 s29, 11
 ; GFX9-NEXT:    s_cselect_b64 s[24:25], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s29, 8
+; GFX9-NEXT:    s_cmp_lg_u32 s29, 8
 ; GFX9-NEXT:    s_cselect_b64 s[26:27], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s29, 9
+; GFX9-NEXT:    s_cmp_lg_u32 s29, 9
 ; GFX9-NEXT:    s_cselect_b64 s[28:29], -1, 0
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    v_cndmask_b32_e32 v10, v3, v9, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v10, v9, v3, vcc
 ; GFX9-NEXT:    s_mov_b64 vcc, s[0:1]
-; GFX9-NEXT:    v_cndmask_b32_sdwa v3, v3, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_sdwa v3, v9, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    s_mov_b64 vcc, s[4:5]
-; GFX9-NEXT:    v_cndmask_b32_e64 v11, v2, v9, s[2:3]
-; GFX9-NEXT:    v_cndmask_b32_sdwa v2, v2, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_e64 v11, v9, v2, s[2:3]
+; GFX9-NEXT:    v_cndmask_b32_sdwa v2, v9, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    s_mov_b64 vcc, s[8:9]
-; GFX9-NEXT:    v_cndmask_b32_e64 v12, v1, v9, s[6:7]
-; GFX9-NEXT:    v_cndmask_b32_sdwa v1, v1, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_e64 v12, v9, v1, s[6:7]
+; GFX9-NEXT:    v_cndmask_b32_sdwa v1, v9, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    s_mov_b64 vcc, s[12:13]
-; GFX9-NEXT:    v_cndmask_b32_e64 v13, v0, v9, s[10:11]
-; GFX9-NEXT:    v_cndmask_b32_sdwa v0, v0, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_e64 v13, v9, v0, s[10:11]
+; GFX9-NEXT:    v_cndmask_b32_sdwa v0, v9, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    s_mov_b64 vcc, s[16:17]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cndmask_b32_e64 v14, v7, v9, s[14:15]
-; GFX9-NEXT:    v_cndmask_b32_sdwa v7, v7, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_e64 v14, v9, v7, s[14:15]
+; GFX9-NEXT:    v_cndmask_b32_sdwa v7, v9, v7, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    s_mov_b64 vcc, s[20:21]
-; GFX9-NEXT:    v_cndmask_b32_e64 v15, v6, v9, s[18:19]
-; GFX9-NEXT:    v_cndmask_b32_sdwa v6, v6, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_e64 v15, v9, v6, s[18:19]
+; GFX9-NEXT:    v_cndmask_b32_sdwa v6, v9, v6, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    s_mov_b64 vcc, s[24:25]
 ; GFX9-NEXT:    v_perm_b32 v3, v3, v10, s30
-; GFX9-NEXT:    v_cndmask_b32_e64 v10, v5, v9, s[22:23]
-; GFX9-NEXT:    v_cndmask_b32_sdwa v5, v5, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_e64 v10, v9, v5, s[22:23]
+; GFX9-NEXT:    v_cndmask_b32_sdwa v5, v9, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    s_mov_b64 vcc, s[28:29]
 ; GFX9-NEXT:    v_perm_b32 v2, v2, v11, s30
-; GFX9-NEXT:    v_cndmask_b32_e64 v11, v4, v9, s[26:27]
-; GFX9-NEXT:    v_cndmask_b32_sdwa v4, v4, v9, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_e64 v11, v9, v4, s[26:27]
+; GFX9-NEXT:    v_cndmask_b32_sdwa v4, v9, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    v_perm_b32 v7, v7, v14, s30
 ; GFX9-NEXT:    v_perm_b32 v6, v6, v15, s30
 ; GFX9-NEXT:    v_perm_b32 v5, v5, v10, s30
@@ -3292,76 +3293,76 @@ define amdgpu_kernel void @v_insertelement_v16f16_dynamic(ptr addrspace(1) %out,
 ; VI-NEXT:    v_add_u32_e32 v8, vcc, s0, v8
 ; VI-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
 ; VI-NEXT:    v_add_u32_e32 v10, vcc, 16, v8
-; VI-NEXT:    s_cmp_eq_u32 s7, 14
+; VI-NEXT:    s_cmp_lg_u32 s7, 14
 ; VI-NEXT:    v_addc_u32_e32 v11, vcc, 0, v9, vcc
 ; VI-NEXT:    v_mov_b32_e32 v12, s6
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 15
+; VI-NEXT:    s_cmp_lg_u32 s7, 15
 ; VI-NEXT:    s_waitcnt vmcnt(1)
-; VI-NEXT:    v_cndmask_b32_e32 v13, v3, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v13, v12, v3, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 12
+; VI-NEXT:    s_cmp_lg_u32 s7, 12
 ; VI-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 13
+; VI-NEXT:    s_cmp_lg_u32 s7, 13
 ; VI-NEXT:    v_lshrrev_b32_e32 v14, 16, v2
-; VI-NEXT:    v_cndmask_b32_e64 v2, v2, v12, s[0:1]
+; VI-NEXT:    v_cndmask_b32_e64 v2, v12, v2, s[0:1]
 ; VI-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 10
+; VI-NEXT:    s_cmp_lg_u32 s7, 10
 ; VI-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 11
+; VI-NEXT:    s_cmp_lg_u32 s7, 11
 ; VI-NEXT:    v_lshrrev_b32_e32 v15, 16, v1
-; VI-NEXT:    v_cndmask_b32_e64 v1, v1, v12, s[2:3]
+; VI-NEXT:    v_cndmask_b32_e64 v1, v12, v1, s[2:3]
 ; VI-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 8
+; VI-NEXT:    s_cmp_lg_u32 s7, 8
 ; VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 ; VI-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; VI-NEXT:    v_cndmask_b32_e64 v15, v15, v12, s[2:3]
-; VI-NEXT:    s_cmp_eq_u32 s7, 9
+; VI-NEXT:    v_cndmask_b32_e64 v15, v12, v15, s[2:3]
+; VI-NEXT:    s_cmp_lg_u32 s7, 9
 ; VI-NEXT:    v_lshrrev_b32_e32 v16, 16, v0
-; VI-NEXT:    v_cndmask_b32_e32 v3, v3, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
 ; VI-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 6
+; VI-NEXT:    s_cmp_lg_u32 s7, 6
 ; VI-NEXT:    v_or_b32_sdwa v1, v1, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NEXT:    v_cndmask_b32_e32 v15, v16, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v15, v12, v16, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 7
-; VI-NEXT:    v_cndmask_b32_e64 v0, v0, v12, s[4:5]
+; VI-NEXT:    s_cmp_lg_u32 s7, 7
+; VI-NEXT:    v_cndmask_b32_e64 v0, v12, v0, s[4:5]
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    v_lshrrev_b32_e32 v17, 16, v7
 ; VI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; VI-NEXT:    v_cndmask_b32_e64 v14, v14, v12, s[0:1]
+; VI-NEXT:    v_cndmask_b32_e64 v14, v12, v14, s[0:1]
 ; VI-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
-; VI-NEXT:    v_cndmask_b32_e32 v7, v7, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v7, v12, v7, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 4
+; VI-NEXT:    s_cmp_lg_u32 s7, 4
 ; VI-NEXT:    v_or_b32_sdwa v3, v13, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    v_lshlrev_b32_e32 v13, 16, v14
 ; VI-NEXT:    v_or_b32_sdwa v0, v0, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NEXT:    v_cndmask_b32_e32 v15, v17, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v15, v12, v17, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 5
+; VI-NEXT:    s_cmp_lg_u32 s7, 5
 ; VI-NEXT:    v_or_b32_sdwa v2, v2, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    v_lshrrev_b32_e32 v13, 16, v6
-; VI-NEXT:    v_cndmask_b32_e32 v6, v6, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v6, v12, v6, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 2
-; VI-NEXT:    v_cndmask_b32_e32 v13, v13, v12, vcc
+; VI-NEXT:    s_cmp_lg_u32 s7, 2
+; VI-NEXT:    v_cndmask_b32_e32 v13, v12, v13, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 3
+; VI-NEXT:    s_cmp_lg_u32 s7, 3
 ; VI-NEXT:    v_lshrrev_b32_e32 v14, 16, v5
-; VI-NEXT:    v_cndmask_b32_e32 v5, v5, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v5, v12, v5, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 0
+; VI-NEXT:    s_cmp_lg_u32 s7, 0
 ; VI-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
-; VI-NEXT:    v_cndmask_b32_e32 v14, v14, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v14, v12, v14, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s7, 1
+; VI-NEXT:    s_cmp_lg_u32 s7, 1
 ; VI-NEXT:    v_or_b32_sdwa v6, v6, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    v_lshrrev_b32_e32 v13, 16, v4
-; VI-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v4, v12, v4, vcc
 ; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    v_cndmask_b32_e32 v12, v13, v12, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
 ; VI-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
 ; VI-NEXT:    v_lshlrev_b32_e32 v14, 16, v14
 ; VI-NEXT:    v_lshlrev_b32_e32 v12, 16, v12
@@ -3391,14 +3392,14 @@ define amdgpu_kernel void @v_insertelement_v16f16_dynamic(ptr addrspace(1) %out,
 ; CI-NEXT:    v_mov_b32_e32 v5, s1
 ; CI-NEXT:    v_add_i32_e32 v4, vcc, s0, v4
 ; CI-NEXT:    v_cvt_f32_f16_e32 v6, s4
-; CI-NEXT:    s_cmp_eq_u32 s5, 15
+; CI-NEXT:    s_cmp_lg_u32 s5, 15
 ; CI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; CI-NEXT:    s_cselect_b64 vcc, -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 14
+; CI-NEXT:    s_cmp_lg_u32 s5, 14
 ; CI-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 13
+; CI-NEXT:    s_cmp_lg_u32 s5, 13
 ; CI-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 12
+; CI-NEXT:    s_cmp_lg_u32 s5, 12
 ; CI-NEXT:    s_waitcnt vmcnt(1)
 ; CI-NEXT:    v_lshrrev_b32_e32 v11, 16, v10
 ; CI-NEXT:    v_cvt_f32_f16_e32 v10, v10
@@ -3409,20 +3410,20 @@ define amdgpu_kernel void @v_insertelement_v16f16_dynamic(ptr addrspace(1) %out,
 ; CI-NEXT:    v_cvt_f32_f16_e32 v9, v9
 ; CI-NEXT:    v_cvt_f32_f16_e32 v13, v13
 ; CI-NEXT:    v_cvt_f32_f16_e32 v8, v8
-; CI-NEXT:    v_cndmask_b32_e64 v10, v10, v6, s[0:1]
+; CI-NEXT:    v_cndmask_b32_e64 v10, v6, v10, s[0:1]
 ; CI-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 11
-; CI-NEXT:    v_cndmask_b32_e32 v11, v11, v6, vcc
-; CI-NEXT:    v_cndmask_b32_e64 v12, v12, v6, s[2:3]
+; CI-NEXT:    s_cmp_lg_u32 s5, 11
+; CI-NEXT:    v_cndmask_b32_e32 v11, v6, v11, vcc
+; CI-NEXT:    v_cndmask_b32_e64 v12, v6, v12, s[2:3]
 ; CI-NEXT:    s_cselect_b64 vcc, -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 10
-; CI-NEXT:    v_cndmask_b32_e64 v9, v9, v6, s[0:1]
-; CI-NEXT:    v_cndmask_b32_e32 v13, v13, v6, vcc
+; CI-NEXT:    s_cmp_lg_u32 s5, 10
+; CI-NEXT:    v_cndmask_b32_e64 v9, v6, v9, s[0:1]
+; CI-NEXT:    v_cndmask_b32_e32 v13, v6, v13, vcc
 ; CI-NEXT:    v_cvt_f16_f32_e32 v12, v12
 ; CI-NEXT:    s_cselect_b64 vcc, -1, 0
 ; CI-NEXT:    v_cvt_f16_f32_e32 v9, v9
 ; CI-NEXT:    v_cvt_f16_f32_e32 v13, v13
-; CI-NEXT:    v_cndmask_b32_e32 v8, v8, v6, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v8, v6, v8, vcc
 ; CI-NEXT:    v_cvt_f16_f32_e32 v8, v8
 ; CI-NEXT:    v_lshlrev_b32_e32 v12, 16, v12
 ; CI-NEXT:    v_lshrrev_b32_e32 v14, 16, v7
@@ -3434,35 +3435,35 @@ define amdgpu_kernel void @v_insertelement_v16f16_dynamic(ptr addrspace(1) %out,
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    v_lshrrev_b32_e32 v15, 16, v3
 ; CI-NEXT:    v_cvt_f32_f16_e32 v13, v15
-; CI-NEXT:    s_cmp_eq_u32 s5, 9
+; CI-NEXT:    s_cmp_lg_u32 s5, 9
 ; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; CI-NEXT:    v_lshrrev_b32_e32 v16, 16, v2
 ; CI-NEXT:    s_cselect_b64 vcc, -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 8
+; CI-NEXT:    s_cmp_lg_u32 s5, 8
 ; CI-NEXT:    v_cvt_f32_f16_e32 v14, v16
-; CI-NEXT:    v_cndmask_b32_e32 v12, v12, v6, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v12, v6, v12, vcc
 ; CI-NEXT:    s_cselect_b64 vcc, -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 7
+; CI-NEXT:    s_cmp_lg_u32 s5, 7
 ; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; CI-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v7, v6, v7, vcc
 ; CI-NEXT:    s_cselect_b64 vcc, -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 6
-; CI-NEXT:    v_cndmask_b32_e32 v13, v13, v6, vcc
+; CI-NEXT:    s_cmp_lg_u32 s5, 6
+; CI-NEXT:    v_cndmask_b32_e32 v13, v6, v13, vcc
 ; CI-NEXT:    s_cselect_b64 vcc, -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 5
+; CI-NEXT:    s_cmp_lg_u32 s5, 5
 ; CI-NEXT:    v_cvt_f16_f32_e32 v11, v11
 ; CI-NEXT:    v_cvt_f16_f32_e32 v12, v12
-; CI-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
 ; CI-NEXT:    s_cselect_b64 vcc, -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 4
+; CI-NEXT:    s_cmp_lg_u32 s5, 4
 ; CI-NEXT:    v_cvt_f16_f32_e32 v10, v10
 ; CI-NEXT:    v_cvt_f16_f32_e32 v7, v7
 ; CI-NEXT:    v_cvt_f16_f32_e32 v13, v13
-; CI-NEXT:    v_cndmask_b32_e32 v14, v14, v6, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v14, v6, v14, vcc
 ; CI-NEXT:    s_cselect_b64 vcc, -1, 0
 ; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; CI-NEXT:    v_cvt_f16_f32_e32 v14, v14
-; CI-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
 ; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; CI-NEXT:    v_lshlrev_b32_e32 v11, 16, v11
 ; CI-NEXT:    v_lshlrev_b32_e32 v12, 16, v12
@@ -3476,23 +3477,23 @@ define amdgpu_kernel void @v_insertelement_v16f16_dynamic(ptr addrspace(1) %out,
 ; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; CI-NEXT:    v_or_b32_e32 v2, v2, v12
 ; CI-NEXT:    v_lshrrev_b32_e32 v12, 16, v0
-; CI-NEXT:    s_cmp_eq_u32 s5, 3
+; CI-NEXT:    s_cmp_lg_u32 s5, 3
 ; CI-NEXT:    v_cvt_f32_f16_e32 v12, v12
 ; CI-NEXT:    s_cselect_b64 vcc, -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 2
+; CI-NEXT:    s_cmp_lg_u32 s5, 2
 ; CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; CI-NEXT:    v_cndmask_b32_e32 v11, v11, v6, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v11, v6, v11, vcc
 ; CI-NEXT:    s_cselect_b64 vcc, -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 1
-; CI-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; CI-NEXT:    s_cmp_lg_u32 s5, 1
+; CI-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
 ; CI-NEXT:    s_cselect_b64 vcc, -1, 0
-; CI-NEXT:    s_cmp_eq_u32 s5, 0
+; CI-NEXT:    s_cmp_lg_u32 s5, 0
 ; CI-NEXT:    v_cvt_f16_f32_e32 v11, v11
-; CI-NEXT:    v_cndmask_b32_e32 v12, v12, v6, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v12, v6, v12, vcc
 ; CI-NEXT:    s_cselect_b64 vcc, -1, 0
 ; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; CI-NEXT:    v_cvt_f16_f32_e32 v12, v12
-; CI-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
 ; CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; CI-NEXT:    v_lshlrev_b32_e32 v6, 16, v11
 ; CI-NEXT:    v_or_b32_e32 v1, v1, v6
@@ -3516,56 +3517,56 @@ define amdgpu_kernel void @v_insertelement_v16f16_dynamic(ptr addrspace(1) %out,
 ; GFX11-TRUE16-NEXT:    s_clause 0x1
 ; GFX11-TRUE16-NEXT:    global_load_b128 v[0:3], v8, s[2:3]
 ; GFX11-TRUE16-NEXT:    global_load_b128 v[4:7], v8, s[2:3] offset:16
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 6
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 6
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 7
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 7
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s3, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 4
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 4
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s6, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 5
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 5
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s7, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 2
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 2
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s8, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 3
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 3
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s9, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 0
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 0
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s10, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 1
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 1
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s11, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 14
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 14
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s12, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 15
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 15
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s13, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 12
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 12
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s14, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 13
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 13
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s15, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 10
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 10
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s16, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 11
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 11
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s17, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 8
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 8
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s18, -1, 0
-; GFX11-TRUE16-NEXT:    s_cmp_eq_u32 s5, 9
+; GFX11-TRUE16-NEXT:    s_cmp_lg_u32 s5, 9
 ; GFX11-TRUE16-NEXT:    s_cselect_b32 s5, -1, 0
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(1)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, s4, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, s4, v3.l, s2
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.l, v7.l, s4, s12
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.h, v7.h, s4, s13
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.l, v6.l, s4, s14
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.h, v6.h, s4, s15
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.l, v5.l, s4, s16
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.h, v5.h, s4, s17
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.l, v4.l, s4, s18
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.h, v4.h, s4, s5
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, v3.h, s4, s3
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v2.l, s4, s6
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v2.h, s4, s7
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, s4, s8
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v1.h, s4, s9
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, s4, s10
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, s4, s11
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.l, s4, v7.l, s12
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.h, s4, v7.h, s13
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.l, s4, v6.l, s14
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.h, s4, v6.h, s15
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.l, s4, v5.l, s16
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.h, s4, v5.h, s17
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.l, s4, v4.l, s18
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.h, s4, v4.h, s5
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, s4, v3.h, s3
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, s4, v2.l, s6
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, s4, v2.h, s7
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, s4, v1.l, s8
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, s4, v1.h, s9
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, s4, v0.l, s10
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, s4, v0.h, s11
 ; GFX11-TRUE16-NEXT:    s_clause 0x1
 ; GFX11-TRUE16-NEXT:    global_store_b128 v8, v[4:7], s[0:1] offset:16
 ; GFX11-TRUE16-NEXT:    global_store_b128 v8, v[0:3], s[0:1]
@@ -3573,84 +3574,85 @@ define amdgpu_kernel void @v_insertelement_v16f16_dynamic(ptr addrspace(1) %out,
 ;
 ; GFX11-FAKE16-LABEL: v_insertelement_v16f16_dynamic:
 ; GFX11-FAKE16:       ; %bb.0:
-; GFX11-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX11-FAKE16-NEXT:    s_clause 0x1
+; GFX11-FAKE16-NEXT:    s_load_b128 s[8:11], s[4:5], 0x0
+; GFX11-FAKE16-NEXT:    s_load_b64 s[2:3], s[4:5], 0x10
 ; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-FAKE16-NEXT:    s_load_b64 s[4:5], s[4:5], 0x10
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 6
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:    v_lshlrev_b32_e32 v8, 5, v0
-; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 7
 ; GFX11-FAKE16-NEXT:    s_clause 0x1
-; GFX11-FAKE16-NEXT:    global_load_b128 v[0:3], v8, s[2:3]
-; GFX11-FAKE16-NEXT:    global_load_b128 v[4:7], v8, s[2:3] offset:16
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 6
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 7
+; GFX11-FAKE16-NEXT:    global_load_b128 v[0:3], v8, s[10:11]
+; GFX11-FAKE16-NEXT:    global_load_b128 v[4:7], v8, s[10:11] offset:16
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(1)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v9, v3, s4, s2
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, s2, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 4
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s3, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 5
+; GFX11-FAKE16-NEXT:    s_cselect_b32 s0, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 5
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v2, v2, s4, s3
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s3, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v2, s2, v2, s0
+; GFX11-FAKE16-NEXT:    s_cselect_b32 s0, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 2
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v3, v3, s4, s2
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 3
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v1, v1, s4, s2
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, s2, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v0
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v11, v11, s4, s2
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 1
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v0, v0, s4, s2
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 14
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v11, s2, v11, vcc_lo
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 14
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v7
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v10, v10, s4, s3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v10, s2, v10, s0
 ; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v3, v9, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v9, v12, s4, s2
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 15
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v7, v7, s4, s2
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 12
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, s2, v12, vcc_lo
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 15
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, s2, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 12
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v14, 16, v6
 ; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v10, v2, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v10, v13, s4, s2
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 13
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v6, v6, s4, s2
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 10
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v10, s2, v13, vcc_lo
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 13
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, s2, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 10
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v15, 16, v5
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v12, v14, s4, s2
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 11
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v5, v5, s4, s2
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 8
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v12, s2, v14, vcc_lo
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 11
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, s2, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 8
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v4
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v13, v15, s4, s2
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
-; GFX11-FAKE16-NEXT:    s_cmp_eq_u32 s5, 9
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v4, v4, s4, s2
-; GFX11-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v13, s2, v15, vcc_lo
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
+; GFX11-FAKE16-NEXT:    s_cmp_lg_u32 s3, 9
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, s2, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_cselect_b32 vcc_lo, -1, 0
 ; GFX11-FAKE16-NEXT:    v_perm_b32 v7, v10, v7, 0x5040100
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v14, v16, s4, s2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v14, s2, v16, vcc_lo
 ; GFX11-FAKE16-NEXT:    v_perm_b32 v6, v12, v6, 0x5040100
 ; GFX11-FAKE16-NEXT:    v_perm_b32 v5, v13, v5, 0x5040100
 ; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v11, v1, 0x5040100
 ; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v9, v0, 0x5040100
 ; GFX11-FAKE16-NEXT:    v_perm_b32 v4, v14, v4, 0x5040100
 ; GFX11-FAKE16-NEXT:    s_clause 0x1
-; GFX11-FAKE16-NEXT:    global_store_b128 v8, v[4:7], s[0:1] offset:16
-; GFX11-FAKE16-NEXT:    global_store_b128 v8, v[0:3], s[0:1]
+; GFX11-FAKE16-NEXT:    global_store_b128 v8, v[4:7], s[8:9] offset:16
+; GFX11-FAKE16-NEXT:    global_store_b128 v8, v[0:3], s[8:9]
 ; GFX11-FAKE16-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %tid.ext = sext i32 %tid to i64
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.simple.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.simple.ll
index 80f295b939709..3a0fdef651616 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.simple.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.simple.ll
@@ -29,14 +29,14 @@ define amdgpu_kernel void @MFMAExpInterleave(ptr addrspace(1) %out0, ptr addrspa
 ; GCN-NEXT:    ; iglp_opt mask(0x00000003)
 ; GCN-NEXT:    v_ldexp_f32 v1, v1, v2
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; GCN-NEXT:    v_cmp_nlt_f32_e32 vcc, s6, v2
+; GCN-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v2
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0x42b17218
 ; GCN-NEXT:    s_nop 0
-; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, s6, v2
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v2
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; GCN-NEXT:    s_mov_b32 s6, 0xc2ce8ed0
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GCN-NEXT:    v_mul_f32_e32 v3, 0x3fb8aa3b, v1
 ; GCN-NEXT:    v_fma_f32 v4, v1, s0, -v3
 ; GCN-NEXT:    v_rndne_f32_e32 v5, v3
@@ -45,13 +45,13 @@ define amdgpu_kernel void @MFMAExpInterleave(ptr addrspace(1) %out0, ptr addrspa
 ; GCN-NEXT:    v_add_f32_e32 v3, v3, v4
 ; GCN-NEXT:    v_exp_f32_e32 v3, v3
 ; GCN-NEXT:    v_cvt_i32_f32_e32 v4, v5
-; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, s6, v1
+; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v1
 ; GCN-NEXT:    v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v0, a[0:3]
 ; GCN-NEXT:    v_ldexp_f32 v3, v3, v4
-; GCN-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
-; GCN-NEXT:    v_cmp_nlt_f32_e32 vcc, s7, v1
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
+; GCN-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v1
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
 ; GCN-NEXT:    v_mul_f32_e32 v3, 0x3fb8aa3b, v1
 ; GCN-NEXT:    v_fma_f32 v4, v1, s0, -v3
 ; GCN-NEXT:    v_rndne_f32_e32 v5, v3
@@ -60,13 +60,13 @@ define amdgpu_kernel void @MFMAExpInterleave(ptr addrspace(1) %out0, ptr addrspa
 ; GCN-NEXT:    v_add_f32_e32 v3, v3, v4
 ; GCN-NEXT:    v_exp_f32_e32 v3, v3
 ; GCN-NEXT:    v_cvt_i32_f32_e32 v4, v5
-; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, s6, v1
+; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v1
 ; GCN-NEXT:    v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v0, a[0:3]
 ; GCN-NEXT:    v_ldexp_f32 v3, v3, v4
-; GCN-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
-; GCN-NEXT:    v_cmp_nlt_f32_e32 vcc, s7, v1
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
+; GCN-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v1
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
 ; GCN-NEXT:    v_mul_f32_e32 v3, 0x3fb8aa3b, v1
 ; GCN-NEXT:    v_fma_f32 v4, v1, s0, -v3
 ; GCN-NEXT:    v_rndne_f32_e32 v5, v3
@@ -75,13 +75,13 @@ define amdgpu_kernel void @MFMAExpInterleave(ptr addrspace(1) %out0, ptr addrspa
 ; GCN-NEXT:    v_add_f32_e32 v3, v3, v4
 ; GCN-NEXT:    v_exp_f32_e32 v3, v3
 ; GCN-NEXT:    v_cvt_i32_f32_e32 v4, v5
-; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, s6, v1
+; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v1
 ; GCN-NEXT:    v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v0, a[0:3]
 ; GCN-NEXT:    v_ldexp_f32 v3, v3, v4
-; GCN-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
-; GCN-NEXT:    v_cmp_nlt_f32_e32 vcc, s7, v1
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
+; GCN-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v1
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
 ; GCN-NEXT:    v_mul_f32_e32 v3, 0x3fb8aa3b, v1
 ; GCN-NEXT:    v_fma_f32 v4, v1, s0, -v3
 ; GCN-NEXT:    v_rndne_f32_e32 v5, v3
@@ -90,13 +90,13 @@ define amdgpu_kernel void @MFMAExpInterleave(ptr addrspace(1) %out0, ptr addrspa
 ; GCN-NEXT:    v_add_f32_e32 v3, v3, v4
 ; GCN-NEXT:    v_exp_f32_e32 v3, v3
 ; GCN-NEXT:    v_cvt_i32_f32_e32 v4, v5
-; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, s6, v1
+; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v1
 ; GCN-NEXT:    v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v0, a[0:3]
 ; GCN-NEXT:    v_ldexp_f32 v3, v3, v4
-; GCN-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
-; GCN-NEXT:    v_cmp_nlt_f32_e32 vcc, s7, v1
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
+; GCN-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v1
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
 ; GCN-NEXT:    v_mul_f32_e32 v3, 0x3fb8aa3b, v1
 ; GCN-NEXT:    v_fma_f32 v4, v1, s0, -v3
 ; GCN-NEXT:    v_rndne_f32_e32 v5, v3
@@ -105,13 +105,13 @@ define amdgpu_kernel void @MFMAExpInterleave(ptr addrspace(1) %out0, ptr addrspa
 ; GCN-NEXT:    v_add_f32_e32 v3, v3, v4
 ; GCN-NEXT:    v_exp_f32_e32 v3, v3
 ; GCN-NEXT:    v_cvt_i32_f32_e32 v4, v5
-; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, s6, v1
+; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v1
 ; GCN-NEXT:    v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v0, a[0:3]
 ; GCN-NEXT:    v_ldexp_f32 v3, v3, v4
-; GCN-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
-; GCN-NEXT:    v_cmp_nlt_f32_e32 vcc, s7, v1
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v3, 0, vcc
+; GCN-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v1
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
 ; GCN-NEXT:    v_mul_f32_e32 v3, 0x3fb8aa3b, v1
 ; GCN-NEXT:    v_fma_f32 v4, v1, s0, -v3
 ; GCN-NEXT:    v_rndne_f32_e32 v5, v3
@@ -121,13 +121,13 @@ define amdgpu_kernel void @MFMAExpInterleave(ptr addrspace(1) %out0, ptr addrspa
 ; GCN-NEXT:    v_exp_f32_e32 v3, v3
 ; GCN-NEXT:    v_cvt_i32_f32_e32 v4, v5
 ; GCN-NEXT:    v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v0, a[0:3]
-; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, s6, v1
+; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v1
 ; GCN-NEXT:    v_ldexp_f32 v0, v3, v4
 ; GCN-NEXT:    s_nop 0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; GCN-NEXT:    v_cmp_nlt_f32_e32 vcc, s7, v1
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GCN-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v1
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GCN-NEXT:    v_mul_f32_e32 v1, 0x3fb8aa3b, v0
 ; GCN-NEXT:    v_fma_f32 v3, v0, s0, -v1
 ; GCN-NEXT:    v_rndne_f32_e32 v4, v1
@@ -137,14 +137,14 @@ define amdgpu_kernel void @MFMAExpInterleave(ptr addrspace(1) %out0, ptr addrspa
 ; GCN-NEXT:    v_exp_f32_e32 v1, v1
 ; GCN-NEXT:    v_cvt_i32_f32_e32 v3, v4
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
-; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, s6, v0
+; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v0
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0
 ; GCN-NEXT:    v_ldexp_f32 v1, v1, v3
-; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; GCN-NEXT:    v_cmp_nlt_f32_e32 vcc, s7, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GCN-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    global_store_dwordx4 v4, a[0:3], s[0:1]
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GCN-NEXT:    global_store_dword v4, v0, s[2:3]
 ; GCN-NEXT:    s_endpgm
   %mai0 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 1.0, <4 x float> %in1, i32 0, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll
index 73586b1243376..2d87a8a14cafd 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll
@@ -1226,15 +1226,15 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
 ; GCN-NEXT:    v_ldexp_f32 v4, v4, v5
 ; GCN-NEXT:    v_mov_b32_e32 v5, 0xc2ce8ed0
 ; GCN-NEXT:    v_mul_f32_e32 v10, s1, v3
-; GCN-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v5
+; GCN-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v5
 ; GCN-NEXT:    v_mov_b32_e32 v6, 0x42b17218
 ; GCN-NEXT:    v_rndne_f32_e32 v11, v10
-; GCN-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v6
+; GCN-NEXT:    v_cndmask_b32_e64 v4, v4, 0, vcc
+; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v6
 ; GCN-NEXT:    v_mov_b32_e32 v8, 0x7f800000
 ; GCN-NEXT:    v_sub_f32_e32 v12, v10, v11
 ; GCN-NEXT:    v_fma_f32 v10, s1, v3, -v10
-; GCN-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
 ; GCN-NEXT:    v_fmac_f32_e32 v10, s1, v7
 ; GCN-NEXT:    ds_read_b128 a[28:31], v1 offset:8304
 ; GCN-NEXT:    s_waitcnt lgkmcnt(1)
@@ -1250,10 +1250,10 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
 ; GCN-NEXT:    ds_read_b128 a[4:7], v1 offset:8208
 ; GCN-NEXT:    ds_read_b128 a[0:3], v1 offset:8192
 ; GCN-NEXT:    v_ldexp_f32 v4, v4, v10
-; GCN-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v5
-; GCN-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, s1, v6
-; GCN-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
+; GCN-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v5
+; GCN-NEXT:    v_cndmask_b32_e64 v4, v4, 0, vcc
+; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v6
+; GCN-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
 ; GCN-NEXT:    v_mul_f32_e32 v10, s2, v3
 ; GCN-NEXT:    v_rndne_f32_e32 v11, v10
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1282,11 +1282,11 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
 ; GCN-NEXT:    ds_read_b128 a[36:39], v1 offset:49168
 ; GCN-NEXT:    ds_read_b128 a[32:35], v1 offset:49152
 ; GCN-NEXT:    v_ldexp_f32 v1, v4, v10
-; GCN-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v5
-; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v6
+; GCN-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v5
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v6
 ; GCN-NEXT:    v_mul_f32_e32 v4, s3, v3
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; GCN-NEXT:    v_rndne_f32_e32 v10, v4
 ; GCN-NEXT:    s_load_dword s8, s[4:5], 0x54
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1297,13 +1297,13 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
 ; GCN-NEXT:    v_add_f32_e32 v1, v1, v4
 ; GCN-NEXT:    v_exp_f32_e32 v1, v1
 ; GCN-NEXT:    v_cvt_i32_f32_e32 v4, v10
-; GCN-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v5
+; GCN-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v5
 ; GCN-NEXT:    ds_read_b128 a[156:159], v2 offset:57456
 ; GCN-NEXT:    ds_read_b128 a[152:155], v2 offset:57440
 ; GCN-NEXT:    v_ldexp_f32 v1, v1, v4
-; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, s3, v6
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v6
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; GCN-NEXT:    v_mul_f32_e32 v4, s8, v3
 ; GCN-NEXT:    v_fma_f32 v3, s8, v3, -v4
 ; GCN-NEXT:    v_mfma_f32_32x32x1f32 a[32:63], v9, v1, a[32:63]
@@ -1320,10 +1320,10 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
 ; GCN-NEXT:    ds_read_b128 a[136:139], v2 offset:57376
 ; GCN-NEXT:    ds_read_b128 a[140:143], v2 offset:57392
 ; GCN-NEXT:    v_ldexp_f32 v1, v3, v1
-; GCN-NEXT:    v_cmp_nlt_f32_e32 vcc, s8, v5
-; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; GCN-NEXT:    v_cmp_ngt_f32_e32 vcc, s8, v6
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; GCN-NEXT:    v_cmp_lt_f32_e32 vcc, s8, v5
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, s8, v6
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; GCN-NEXT:    v_add_u32_e32 v0, s7, v0
 ; GCN-NEXT:    ds_write_b128 v0, a[124:127] offset:112
 ; GCN-NEXT:    s_waitcnt lgkmcnt(1)
@@ -1411,15 +1411,15 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
 ; EXACTCUTOFF-NEXT:    v_ldexp_f32 v4, v4, v5
 ; EXACTCUTOFF-NEXT:    v_mov_b32_e32 v5, 0xc2ce8ed0
 ; EXACTCUTOFF-NEXT:    v_mul_f32_e32 v10, s1, v3
-; EXACTCUTOFF-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v5
+; EXACTCUTOFF-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v5
 ; EXACTCUTOFF-NEXT:    v_mov_b32_e32 v6, 0x42b17218
 ; EXACTCUTOFF-NEXT:    v_rndne_f32_e32 v11, v10
-; EXACTCUTOFF-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; EXACTCUTOFF-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v6
+; EXACTCUTOFF-NEXT:    v_cndmask_b32_e64 v4, v4, 0, vcc
+; EXACTCUTOFF-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v6
 ; EXACTCUTOFF-NEXT:    v_mov_b32_e32 v8, 0x7f800000
 ; EXACTCUTOFF-NEXT:    v_sub_f32_e32 v12, v10, v11
 ; EXACTCUTOFF-NEXT:    v_fma_f32 v10, s1, v3, -v10
-; EXACTCUTOFF-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
+; EXACTCUTOFF-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
 ; EXACTCUTOFF-NEXT:    v_fmac_f32_e32 v10, s1, v7
 ; EXACTCUTOFF-NEXT:    ds_read_b128 a[28:31], v1 offset:8304
 ; EXACTCUTOFF-NEXT:    s_waitcnt lgkmcnt(1)
@@ -1435,10 +1435,10 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
 ; EXACTCUTOFF-NEXT:    ds_read_b128 a[4:7], v1 offset:8208
 ; EXACTCUTOFF-NEXT:    ds_read_b128 a[0:3], v1 offset:8192
 ; EXACTCUTOFF-NEXT:    v_ldexp_f32 v4, v4, v10
-; EXACTCUTOFF-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v5
-; EXACTCUTOFF-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
-; EXACTCUTOFF-NEXT:    v_cmp_ngt_f32_e32 vcc, s1, v6
-; EXACTCUTOFF-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
+; EXACTCUTOFF-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v5
+; EXACTCUTOFF-NEXT:    v_cndmask_b32_e64 v4, v4, 0, vcc
+; EXACTCUTOFF-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v6
+; EXACTCUTOFF-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
 ; EXACTCUTOFF-NEXT:    v_mul_f32_e32 v10, s2, v3
 ; EXACTCUTOFF-NEXT:    v_rndne_f32_e32 v11, v10
 ; EXACTCUTOFF-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1467,11 +1467,11 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
 ; EXACTCUTOFF-NEXT:    ds_read_b128 a[36:39], v1 offset:49168
 ; EXACTCUTOFF-NEXT:    ds_read_b128 a[32:35], v1 offset:49152
 ; EXACTCUTOFF-NEXT:    v_ldexp_f32 v1, v4, v10
-; EXACTCUTOFF-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v5
-; EXACTCUTOFF-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; EXACTCUTOFF-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v6
+; EXACTCUTOFF-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v5
+; EXACTCUTOFF-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; EXACTCUTOFF-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v6
 ; EXACTCUTOFF-NEXT:    v_mul_f32_e32 v4, s3, v3
-; EXACTCUTOFF-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; EXACTCUTOFF-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; EXACTCUTOFF-NEXT:    v_rndne_f32_e32 v10, v4
 ; EXACTCUTOFF-NEXT:    s_load_dword s8, s[4:5], 0x54
 ; EXACTCUTOFF-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1482,13 +1482,13 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
 ; EXACTCUTOFF-NEXT:    v_add_f32_e32 v1, v1, v4
 ; EXACTCUTOFF-NEXT:    v_exp_f32_e32 v1, v1
 ; EXACTCUTOFF-NEXT:    v_cvt_i32_f32_e32 v4, v10
-; EXACTCUTOFF-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v5
+; EXACTCUTOFF-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v5
 ; EXACTCUTOFF-NEXT:    ds_read_b128 a[156:159], v2 offset:57456
 ; EXACTCUTOFF-NEXT:    ds_read_b128 a[152:155], v2 offset:57440
 ; EXACTCUTOFF-NEXT:    v_ldexp_f32 v1, v1, v4
-; EXACTCUTOFF-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; EXACTCUTOFF-NEXT:    v_cmp_ngt_f32_e32 vcc, s3, v6
-; EXACTCUTOFF-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; EXACTCUTOFF-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; EXACTCUTOFF-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v6
+; EXACTCUTOFF-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; EXACTCUTOFF-NEXT:    v_mul_f32_e32 v4, s8, v3
 ; EXACTCUTOFF-NEXT:    v_fma_f32 v3, s8, v3, -v4
 ; EXACTCUTOFF-NEXT:    v_mfma_f32_32x32x1f32 a[32:63], v9, v1, a[32:63]
@@ -1505,10 +1505,10 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA
 ; EXACTCUTOFF-NEXT:    ds_read_b128 a[136:139], v2 offset:57376
 ; EXACTCUTOFF-NEXT:    ds_read_b128 a[140:143], v2 offset:57392
 ; EXACTCUTOFF-NEXT:    v_ldexp_f32 v1, v3, v1
-; EXACTCUTOFF-NEXT:    v_cmp_nlt_f32_e32 vcc, s8, v5
-; EXACTCUTOFF-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; EXACTCUTOFF-NEXT:    v_cmp_ngt_f32_e32 vcc, s8, v6
-; EXACTCUTOFF-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; EXACTCUTOFF-NEXT:    v_cmp_lt_f32_e32 vcc, s8, v5
+; EXACTCUTOFF-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; EXACTCUTOFF-NEXT:    v_cmp_gt_f32_e32 vcc, s8, v6
+; EXACTCUTOFF-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; EXACTCUTOFF-NEXT:    v_add_u32_e32 v0, s7, v0
 ; EXACTCUTOFF-NEXT:    ds_write_b128 v0, a[124:127] offset:112
 ; EXACTCUTOFF-NEXT:    s_waitcnt lgkmcnt(1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
index 978f223aafb94..55be46b62c4c2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
@@ -34,11 +34,11 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc2ce8ed0
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v1
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v1
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42b17218
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v1
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
@@ -99,11 +99,11 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc2ce8ed0
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v1
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v1
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42b17218
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v1
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX900-SDAG-NEXT:    global_store_dword v2, v0, s[0:1]
 ; GFX900-SDAG-NEXT:    s_endpgm
 ;
@@ -154,11 +154,11 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
 ; SI-SDAG-NEXT:    s_mov_b32 s2, -1
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc2ce8ed0
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s6, v1
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v1
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42b17218
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s6, v1
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; SI-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-SDAG-NEXT:    s_endpgm
 ;
@@ -372,18 +372,18 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ; VI-SDAG-NEXT:    v_cvt_i32_f32_e32 v4, v5
 ; VI-SDAG-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v2
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v2
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v3, 0x42b17218
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x7f800000
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s3, v3
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v3
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v4
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v2
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v3
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s1
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v2, s0
 ; VI-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    s_endpgm
@@ -464,18 +464,18 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_cvt_i32_f32_e32 v6, v7
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v2, v2, v3
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v5
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v5
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v3, 0x42b17218
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v7, 0x7f800000
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s3, v3
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v3
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v2, v7, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v6
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v5
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v3
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v5
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v4, 0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
 ; GFX900-SDAG-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1]
 ; GFX900-SDAG-NEXT:    s_endpgm
 ;
@@ -544,19 +544,19 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v5, v6
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v2, v2, v3
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v3, 0xc2ce8ed0
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s7, v3
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v3
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x42b17218
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v6, 0x7f800000
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s7, v4
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v6, v2, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s7, v4
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v2, v6, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v5
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s6, v3
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s6, v4
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v3
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v4
 ; SI-SDAG-NEXT:    s_mov_b32 s0, s4
 ; SI-SDAG-NEXT:    s_mov_b32 s1, s5
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
 ; SI-SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; SI-SDAG-NEXT:    s_endpgm
 ;
@@ -888,13 +888,13 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v7, v2
 ; VI-SDAG-NEXT:    v_cvt_i32_f32_e32 v6, v6
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v3, 0xc2ce8ed0
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v3
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v3
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x42b17218
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v8, 0x7f800000
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v5
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
 ; VI-SDAG-NEXT:    s_and_b32 s2, s0, 0xfffff000
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v8, v1, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v1, v8, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v1, v7, v6
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v7, s2
 ; VI-SDAG-NEXT:    v_sub_f32_e32 v7, s0, v7
@@ -909,16 +909,16 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; VI-SDAG-NEXT:    v_add_f32_e32 v0, v0, v4
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_cvt_i32_f32_e32 v4, v6
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v3
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s1, v5
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v3
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v4
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v3
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v5
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v3
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v4, s5
 ; VI-SDAG-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
 ; VI-SDAG-NEXT:    s_endpgm
@@ -1028,23 +1028,23 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v2, v2, v3
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v3, 0xc2ce8ed0
 ; GFX900-SDAG-NEXT:    v_add_f32_e32 v0, v10, v0
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v3
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v3
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_cvt_i32_f32_e32 v7, v9
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v8, 0x7f800000
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v5
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v3
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v6, vcc
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s1, v5
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v3
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, v6, 0, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v7
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v3
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v5
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v3
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v4, 0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
 ; GFX900-SDAG-NEXT:    global_store_dwordx3 v4, v[0:2], s[6:7]
 ; GFX900-SDAG-NEXT:    s_endpgm
 ;
@@ -1134,24 +1134,24 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, v1, v3
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v3, 0xc2ce8ed0
 ; SI-SDAG-NEXT:    v_add_f32_e32 v0, v9, v0
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v3
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v3
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x42b17218
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v2, v0
 ; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v6, v8
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v7, 0x7f800000
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s1, v4
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v3
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v5, vcc
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v4
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v4
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v3
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v5, 0, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v4
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v2, v2, v6
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v3
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v4
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v3
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v4
 ; SI-SDAG-NEXT:    s_mov_b32 s6, -1
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
 ; SI-SDAG-NEXT:    buffer_store_dword v2, off, s[4:7], 0 offset:8
 ; SI-SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; SI-SDAG-NEXT:    s_endpgm
@@ -1626,16 +1626,16 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; VI-SDAG-NEXT:    v_add_f32_e32 v7, v8, v7
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0xc2ce8ed0
 ; VI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v7
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v5
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v5
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
 ; VI-SDAG-NEXT:    v_cvt_i32_f32_e32 v7, v3
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s3, v6
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v6
 ; VI-SDAG-NEXT:    s_and_b32 s3, s1, 0xfffff000
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v9, s3
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v8, 0x7f800000
 ; VI-SDAG-NEXT:    v_sub_f32_e32 v9, s1, v9
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v8, v1, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v1, v8, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v1, v2, v7
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v2, s3, v0
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v10, 0x39a3b295, v9
@@ -1648,11 +1648,11 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; VI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v9
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v9, v2
 ; VI-SDAG-NEXT:    v_cvt_i32_f32_e32 v7, v7
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v5
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v6
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v5
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v6
 ; VI-SDAG-NEXT:    s_and_b32 s2, s0, 0xfffff000
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v8, v1, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v1, v8, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v1, v9, v7
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v9, s2
 ; VI-SDAG-NEXT:    v_sub_f32_e32 v9, s0, v9
@@ -1667,16 +1667,16 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; VI-SDAG-NEXT:    v_add_f32_e32 v0, v0, v4
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_cvt_i32_f32_e32 v4, v7
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v5
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s1, v6
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v5
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v6
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v4
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v5
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v6
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v5
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v6
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v4, s4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s5
 ; VI-SDAG-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-SDAG-NEXT:    s_endpgm
@@ -1788,7 +1788,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; GFX900-SDAG-NEXT:    v_add_f32_e32 v2, v2, v4
 ; GFX900-SDAG-NEXT:    v_cvt_i32_f32_e32 v3, v3
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v2, v2
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v5
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v5
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v6, 0x42b17218
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v9, 0x7f800000
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v2, v2, v3
@@ -1800,9 +1800,9 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; GFX900-SDAG-NEXT:    v_add_f32_e32 v3, v8, v3
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v8, v3
 ; GFX900-SDAG-NEXT:    v_cvt_i32_f32_e32 v7, v7
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s3, v6
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v3, v9, v2, vcc
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v6
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v3, v2, v9, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v2, v8, v7
 ; GFX900-SDAG-NEXT:    v_mul_f32_e32 v7, s1, v0
 ; GFX900-SDAG-NEXT:    v_rndne_f32_e32 v8, v7
@@ -1812,9 +1812,9 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; GFX900-SDAG-NEXT:    v_add_f32_e32 v7, v10, v7
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v7, v7
 ; GFX900-SDAG-NEXT:    v_cvt_i32_f32_e32 v8, v8
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v5
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v6
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v5
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v6
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v7, v7, v8
 ; GFX900-SDAG-NEXT:    v_mul_f32_e32 v8, s0, v0
 ; GFX900-SDAG-NEXT:    v_rndne_f32_e32 v10, v8
@@ -1824,17 +1824,17 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; GFX900-SDAG-NEXT:    v_add_f32_e32 v0, v11, v0
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_cvt_i32_f32_e32 v8, v10
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v5
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v7, vcc
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s1, v6
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v9, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v5
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, v7, 0, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v6
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v8
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v5
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v6
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v5
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v6
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v4, 0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v9, v0, vcc
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
 ; GFX900-SDAG-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; GFX900-SDAG-NEXT:    s_endpgm
 ;
@@ -1921,7 +1921,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
 ; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v3, v3
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xc2ce8ed0
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v4
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
 ; SI-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v2, v2, v3
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v3, s2, v0
@@ -1932,9 +1932,9 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; SI-SDAG-NEXT:    v_add_f32_e32 v3, v7, v3
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v7, v3
 ; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v6, v6
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s3, v5
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v8, v2, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v2, v8, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v2, v7, v6
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v6, s1, v0
 ; SI-SDAG-NEXT:    v_rndne_f32_e32 v7, v6
@@ -1944,9 +1944,9 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; SI-SDAG-NEXT:    v_add_f32_e32 v6, v9, v6
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v6, v6
 ; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v7, v7
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v4
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v5
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v6, v6, v7
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v7, s0, v0
 ; SI-SDAG-NEXT:    v_rndne_f32_e32 v9, v7
@@ -1956,18 +1956,18 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; SI-SDAG-NEXT:    v_add_f32_e32 v0, v10, v0
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v7, v9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v4
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v6, vcc
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s1, v5
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v6, 0, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v7
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v4
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v5
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
 ; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-SDAG-NEXT:    s_mov_b32 s6, -1
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
 ; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; SI-SDAG-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
index 70c3787bac9a1..0be78a93ecda4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
@@ -36,11 +36,11 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) {
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x7f800000
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc23369f4
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v1
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v1
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x421a209b
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v1
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
@@ -101,11 +101,11 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) {
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc23369f4
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v1
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v1
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x421a209b
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v1
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX900-SDAG-NEXT:    global_store_dword v2, v0, s[0:1]
 ; GFX900-SDAG-NEXT:    s_endpgm
 ;
@@ -156,11 +156,11 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) {
 ; SI-SDAG-NEXT:    s_mov_b32 s2, -1
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc23369f4
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s6, v1
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v1
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x421a209b
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s6, v1
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; SI-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-SDAG-NEXT:    s_endpgm
 ;
@@ -374,18 +374,18 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ; VI-SDAG-NEXT:    v_cvt_i32_f32_e32 v4, v5
 ; VI-SDAG-NEXT:    v_ldexp_f32 v1, v1, v2
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0xc23369f4
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v2
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v2
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v3, 0x421a209b
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x7f800000
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s3, v3
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v3
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v4
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v2
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v3
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s1
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v2, s0
 ; VI-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    s_endpgm
@@ -466,18 +466,18 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_cvt_i32_f32_e32 v6, v7
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v2, v2, v3
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v5
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v5
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v3, 0x421a209b
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v7, 0x7f800000
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s3, v3
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v3
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v2, v7, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v6
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v5
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v3
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v5
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v3
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v4, 0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
 ; GFX900-SDAG-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1]
 ; GFX900-SDAG-NEXT:    s_endpgm
 ;
@@ -546,19 +546,19 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v5, v6
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v2, v2, v3
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v3, 0xc23369f4
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s7, v3
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v3
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x421a209b
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v6, 0x7f800000
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s7, v4
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v6, v2, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s7, v4
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v2, v6, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v5
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s6, v3
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s6, v4
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v3
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v4
 ; SI-SDAG-NEXT:    s_mov_b32 s0, s4
 ; SI-SDAG-NEXT:    s_mov_b32 s1, s5
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
 ; SI-SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; SI-SDAG-NEXT:    s_endpgm
 ;
@@ -890,13 +890,13 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v7, v2
 ; VI-SDAG-NEXT:    v_cvt_i32_f32_e32 v6, v6
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v3, 0xc23369f4
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v3
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v3
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0x421a209b
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v8, 0x7f800000
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v5
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
 ; VI-SDAG-NEXT:    s_and_b32 s2, s0, 0xfffff000
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v8, v1, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v1, v8, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v1, v7, v6
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v7, s2
 ; VI-SDAG-NEXT:    v_sub_f32_e32 v7, s0, v7
@@ -911,16 +911,16 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; VI-SDAG-NEXT:    v_add_f32_e32 v0, v0, v4
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_cvt_i32_f32_e32 v4, v6
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v3
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s1, v5
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v3
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v4
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v3
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v5
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v3
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v4, s5
 ; VI-SDAG-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
 ; VI-SDAG-NEXT:    s_endpgm
@@ -1030,23 +1030,23 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v2, v2, v3
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v3, 0xc23369f4
 ; GFX900-SDAG-NEXT:    v_add_f32_e32 v0, v10, v0
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v3
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v3
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_cvt_i32_f32_e32 v7, v9
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v8, 0x7f800000
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v5
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v3
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v6, vcc
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s1, v5
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v3
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, v6, 0, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v7
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v3
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v5
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v3
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v4, 0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
 ; GFX900-SDAG-NEXT:    global_store_dwordx3 v4, v[0:2], s[6:7]
 ; GFX900-SDAG-NEXT:    s_endpgm
 ;
@@ -1136,24 +1136,24 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, v1, v3
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v3, 0xc23369f4
 ; SI-SDAG-NEXT:    v_add_f32_e32 v0, v9, v0
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v3
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v3
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x421a209b
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v2, v0
 ; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v6, v8
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v7, 0x7f800000
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s1, v4
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v3
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v5, vcc
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v4
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v4
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v3
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v5, 0, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v4
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v2, v2, v6
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v3
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v4
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v3
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v4
 ; SI-SDAG-NEXT:    s_mov_b32 s6, -1
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
 ; SI-SDAG-NEXT:    buffer_store_dword v2, off, s[4:7], 0 offset:8
 ; SI-SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; SI-SDAG-NEXT:    s_endpgm
@@ -1628,16 +1628,16 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; VI-SDAG-NEXT:    v_add_f32_e32 v7, v8, v7
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v5, 0xc23369f4
 ; VI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v7
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v5
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v5
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
 ; VI-SDAG-NEXT:    v_cvt_i32_f32_e32 v7, v3
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s3, v6
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v6
 ; VI-SDAG-NEXT:    s_and_b32 s3, s1, 0xfffff000
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v9, s3
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v8, 0x7f800000
 ; VI-SDAG-NEXT:    v_sub_f32_e32 v9, s1, v9
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v8, v1, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v1, v8, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v1, v2, v7
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v2, s3, v0
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v10, 0x3a2784bc, v9
@@ -1650,11 +1650,11 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; VI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v9
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v9, v2
 ; VI-SDAG-NEXT:    v_cvt_i32_f32_e32 v7, v7
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v5
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v6
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v5
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v6
 ; VI-SDAG-NEXT:    s_and_b32 s2, s0, 0xfffff000
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v8, v1, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v1, v8, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v1, v9, v7
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v9, s2
 ; VI-SDAG-NEXT:    v_sub_f32_e32 v9, s0, v9
@@ -1669,16 +1669,16 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; VI-SDAG-NEXT:    v_add_f32_e32 v0, v0, v4
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_cvt_i32_f32_e32 v4, v7
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v5
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s1, v6
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v5
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v6
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v4
-; VI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v5
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; VI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v6
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v5
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v6
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v4, s4
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s5
 ; VI-SDAG-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-SDAG-NEXT:    s_endpgm
@@ -1790,7 +1790,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; GFX900-SDAG-NEXT:    v_add_f32_e32 v2, v2, v4
 ; GFX900-SDAG-NEXT:    v_cvt_i32_f32_e32 v3, v3
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v2, v2
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v5
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v5
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v6, 0x421a209b
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v9, 0x7f800000
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v2, v2, v3
@@ -1802,9 +1802,9 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; GFX900-SDAG-NEXT:    v_add_f32_e32 v3, v8, v3
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v8, v3
 ; GFX900-SDAG-NEXT:    v_cvt_i32_f32_e32 v7, v7
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s3, v6
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v3, v9, v2, vcc
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v6
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v3, v2, v9, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v2, v8, v7
 ; GFX900-SDAG-NEXT:    v_mul_f32_e32 v7, s1, v0
 ; GFX900-SDAG-NEXT:    v_rndne_f32_e32 v8, v7
@@ -1814,9 +1814,9 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; GFX900-SDAG-NEXT:    v_add_f32_e32 v7, v10, v7
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v7, v7
 ; GFX900-SDAG-NEXT:    v_cvt_i32_f32_e32 v8, v8
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v5
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v6
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v5
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v6
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v7, v7, v8
 ; GFX900-SDAG-NEXT:    v_mul_f32_e32 v8, s0, v0
 ; GFX900-SDAG-NEXT:    v_rndne_f32_e32 v10, v8
@@ -1826,17 +1826,17 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; GFX900-SDAG-NEXT:    v_add_f32_e32 v0, v11, v0
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_cvt_i32_f32_e32 v8, v10
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, v9, v2, vcc
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v5
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v7, vcc
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s1, v6
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v9, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v5
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, v7, 0, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v6
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v8
-; GFX900-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v5
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; GFX900-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v6
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v5
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v6
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v4, 0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v9, v0, vcc
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
 ; GFX900-SDAG-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; GFX900-SDAG-NEXT:    s_endpgm
 ;
@@ -1923,7 +1923,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
 ; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v3, v3
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v4, 0xc23369f4
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s3, v4
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v4
 ; SI-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v2, v2, v3
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v3, s2, v0
@@ -1934,9 +1934,9 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; SI-SDAG-NEXT:    v_add_f32_e32 v3, v7, v3
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v7, v3
 ; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v6, v6
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s3, v5
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v8, v2, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s3, v5
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v2, v8, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v2, v7, v6
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v6, s1, v0
 ; SI-SDAG-NEXT:    v_rndne_f32_e32 v7, v6
@@ -1946,9 +1946,9 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; SI-SDAG-NEXT:    v_add_f32_e32 v6, v9, v6
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v6, v6
 ; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v7, v7
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s2, v4
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v5
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v4
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, 0, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v5
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v6, v6, v7
 ; SI-SDAG-NEXT:    v_mul_f32_e32 v7, s0, v0
 ; SI-SDAG-NEXT:    v_rndne_f32_e32 v9, v7
@@ -1958,18 +1958,18 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; SI-SDAG-NEXT:    v_add_f32_e32 v0, v10, v0
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v7, v9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s1, v4
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v6, vcc
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s1, v5
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v4
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v6, 0, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s1, v5
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v7
-; SI-SDAG-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v4
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; SI-SDAG-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v5
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v4
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v5
 ; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-SDAG-NEXT:    s_mov_b32 s6, -1
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
 ; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; SI-SDAG-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
index a56c92785d487..8b85aca53bbea 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
@@ -531,8 +531,8 @@ define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; GFX7-NEXT:    v_max_f32_e32 v3, v1, v0
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v0
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7-NEXT:    ;;#ASMSTART
 ; GFX7-NEXT:    ; use v0
@@ -545,8 +545,8 @@ define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s17
 ; GFX8-NEXT:    v_max_f16_e32 v1, s16, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, s16, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, s16, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v0
@@ -559,8 +559,8 @@ define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
 ; GFX900-NEXT:    v_mov_b32_e32 v0, s17
 ; GFX900-NEXT:    v_max_f16_e32 v1, s16, v0
 ; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, s16, v0
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, s16, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX900-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use v0
@@ -583,8 +583,8 @@ define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_max_f16_e64 v0, s16, s17
-; GFX10-NEXT:    v_cmp_o_f16_e64 vcc_lo, s16, s17
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, s16, s17
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, 0x7e00, s4
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v0
@@ -594,10 +594,10 @@ define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
 ; GFX11-TRUE16-LABEL: s_maximum_f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, s0, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, s0, s1
 ; GFX11-TRUE16-NEXT:    v_max_f16_e64 v0.l, s0, s1
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, 0x7e00, s2
 ; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX11-TRUE16-NEXT:    ;;#ASMSTART
 ; GFX11-TRUE16-NEXT:    ; use v0
@@ -608,9 +608,9 @@ define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    v_max_f16_e64 v0, s0, s1
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e64 s0, s0, s1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v0, v0, 0x7e00, s0
 ; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX11-FAKE16-NEXT:    ;;#ASMSTART
 ; GFX11-FAKE16-NEXT:    ; use v0
@@ -990,12 +990,12 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; GFX7-NEXT:    v_mov_b32_e32 v5, 0x7fc00000
 ; GFX7-NEXT:    v_max_f32_e32 v4, v1, v0
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v1, v0
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v1, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
 ; GFX7-NEXT:    v_max_f32_e32 v1, v3, v2
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, v3, v2
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v2
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
@@ -1010,14 +1010,14 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX8-NEXT:    s_lshr_b32 s4, s17, 16
 ; GFX8-NEXT:    s_lshr_b32 s5, s16, 16
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s4
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, s5, v0
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, s5, v0
 ; GFX8-NEXT:    v_max_f16_e32 v0, s5, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7e00
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s17
-; GFX8-NEXT:    v_cndmask_b32_sdwa v0, v1, v0, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_cndmask_b32_sdwa v0, v0, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_max_f16_e32 v3, s16, v2
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, s16, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, s16, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v0
@@ -1032,12 +1032,12 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX900-NEXT:    s_lshr_b32 s4, s17, 16
 ; GFX900-NEXT:    v_pk_max_f16 v1, s16, v1
 ; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, s16, v0
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, s16, v0
 ; GFX900-NEXT:    s_lshr_b32 s5, s16, 16
 ; GFX900-NEXT:    v_mov_b32_e32 v3, s4
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, s5, v3
-; GFX900-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, s5, v3
+; GFX900-NEXT:    v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; GFX900-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX900-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
 ; GFX900-NEXT:    ;;#ASMSTART
@@ -1059,16 +1059,16 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX10-LABEL: s_maximum_v2f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_pk_max_f16 v0, s16, s17
+; GFX10-NEXT:    v_cmp_u_f16_e64 s5, s16, s17
 ; GFX10-NEXT:    s_lshr_b32 s4, s17, 16
+; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, 0x7e00, s5
 ; GFX10-NEXT:    s_lshr_b32 s5, s16, 16
-; GFX10-NEXT:    v_pk_max_f16 v0, s16, s17
-; GFX10-NEXT:    v_cmp_o_f16_e64 vcc_lo, s5, s4
-; GFX10-NEXT:    v_cmp_o_f16_e64 s4, s16, s17
-; GFX10-NEXT:    v_mov_b32_e32 v1, 0x7e00
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0x7e00, v0, s4
-; GFX10-NEXT:    v_cndmask_b32_sdwa v0, v1, v0, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v2
-; GFX10-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, s5, s4
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x7e00, s4
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v0
 ; GFX10-NEXT:    ;;#ASMEND
@@ -1077,16 +1077,16 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX11-TRUE16-LABEL: s_maximum_v2f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, s0, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, s0, s1
 ; GFX11-TRUE16-NEXT:    v_pk_max_f16 v0, s0, s1
 ; GFX11-TRUE16-NEXT:    s_lshr_b32 s1, s1, 16
 ; GFX11-TRUE16-NEXT:    s_lshr_b32 s0, s0, 16
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, s0, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, s0, s1
 ; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, 0x7e00, s2
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, 0x7e00, s0
 ; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-TRUE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
@@ -1099,16 +1099,16 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    v_pk_max_f16 v0, s0, s1
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
 ; GFX11-FAKE16-NEXT:    s_lshr_b32 s2, s1, 16
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e64 s1, s0, s1
 ; GFX11-FAKE16-NEXT:    s_lshr_b32 s0, s0, 16
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e64 s0, s0, s2
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s2
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v0, v0, 0x7e00, s1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v1, v1, 0x7e00, s0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
 ; GFX11-FAKE16-NEXT:    ;;#ASMSTART
 ; GFX11-FAKE16-NEXT:    ; use v0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
index 826bf427503ab..4f624cb271500 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
@@ -389,8 +389,8 @@ define void @s_maximum_f32(float inreg %src0, float inreg %src1) {
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s17
 ; GFX7-NEXT:    v_max_f32_e32 v1, s16, v0
 ; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, s16, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX7-NEXT:    ;;#ASMSTART
 ; GFX7-NEXT:    ; use v0
 ; GFX7-NEXT:    ;;#ASMEND
@@ -402,8 +402,8 @@ define void @s_maximum_f32(float inreg %src0, float inreg %src1) {
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s17
 ; GFX8-NEXT:    v_max_f32_e32 v1, s16, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, s16, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v0
 ; GFX8-NEXT:    ;;#ASMEND
@@ -415,8 +415,8 @@ define void @s_maximum_f32(float inreg %src0, float inreg %src1) {
 ; GFX900-NEXT:    v_mov_b32_e32 v0, s17
 ; GFX900-NEXT:    v_max_f32_e32 v1, s16, v0
 ; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, s16, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use v0
 ; GFX900-NEXT:    ;;#ASMEND
@@ -436,8 +436,8 @@ define void @s_maximum_f32(float inreg %src0, float inreg %src1) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_max_f32_e64 v0, s16, s17
-; GFX10-NEXT:    v_cmp_o_f32_e64 vcc_lo, s16, s17
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e64 s4, s16, s17
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, 0x7fc00000, s4
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v0
 ; GFX10-NEXT:    ;;#ASMEND
@@ -447,9 +447,9 @@ define void @s_maximum_f32(float inreg %src0, float inreg %src1) {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_max_f32_e64 v0, s0, s1
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s0, s1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e64 s0, s0, s1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, 0x7fc00000, s0
 ; GFX11-NEXT:    ;;#ASMSTART
 ; GFX11-NEXT:    ; use v0
 ; GFX11-NEXT:    ;;#ASMEND
@@ -752,12 +752,12 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s19
 ; GFX7-NEXT:    v_max_f32_e32 v1, s17, v0
 ; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, s17, v0
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, s17, v0
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s18
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX7-NEXT:    v_max_f32_e32 v3, s16, v0
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, s16, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
 ; GFX7-NEXT:    ;;#ASMSTART
 ; GFX7-NEXT:    ; use v[0:1]
 ; GFX7-NEXT:    ;;#ASMEND
@@ -769,12 +769,12 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s19
 ; GFX8-NEXT:    v_max_f32_e32 v1, s17, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, s17, v0
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, s17, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s18
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX8-NEXT:    v_max_f32_e32 v3, s16, v0
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, s16, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v[0:1]
 ; GFX8-NEXT:    ;;#ASMEND
@@ -786,12 +786,12 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX900-NEXT:    v_mov_b32_e32 v0, s19
 ; GFX900-NEXT:    v_max_f32_e32 v1, s17, v0
 ; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, s17, v0
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, s17, v0
 ; GFX900-NEXT:    v_mov_b32_e32 v0, s18
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX900-NEXT:    v_max_f32_e32 v3, s16, v0
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, s16, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use v[0:1]
 ; GFX900-NEXT:    ;;#ASMEND
@@ -813,11 +813,11 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_max_f32_e64 v0, s17, s19
-; GFX10-NEXT:    v_cmp_o_f32_e64 vcc_lo, s17, s19
+; GFX10-NEXT:    v_cmp_u_f32_e64 s4, s17, s19
 ; GFX10-NEXT:    v_max_f32_e64 v2, s16, s18
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e64 vcc_lo, s16, s18
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v0, 0x7fc00000, s4
+; GFX10-NEXT:    v_cmp_u_f32_e64 s4, s16, s18
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0x7fc00000, s4
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v[0:1]
 ; GFX10-NEXT:    ;;#ASMEND
@@ -827,12 +827,12 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_max_f32_e64 v0, s1, s3
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s1, s3
 ; GFX11-NEXT:    v_max_f32_e64 v2, s0, s2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s0, s2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e64 s1, s1, s3
+; GFX11-NEXT:    v_cmp_u_f32_e64 s0, s0, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, v0, 0x7fc00000, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0x7fc00000, s0
 ; GFX11-NEXT:    ;;#ASMSTART
 ; GFX11-NEXT:    ; use v[0:1]
 ; GFX11-NEXT:    ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
index 3dcc70b0ea3b6..7f8aec379f8ef 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
@@ -455,8 +455,8 @@ define void @s_minimum_f16(half inreg %src0, half inreg %src1) {
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s17
 ; GFX8-NEXT:    v_min_f16_e32 v1, s16, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, s16, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, s16, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v0
@@ -469,8 +469,8 @@ define void @s_minimum_f16(half inreg %src0, half inreg %src1) {
 ; GFX900-NEXT:    v_mov_b32_e32 v0, s17
 ; GFX900-NEXT:    v_min_f16_e32 v1, s16, v0
 ; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, s16, v0
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, s16, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX900-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use v0
@@ -493,8 +493,8 @@ define void @s_minimum_f16(half inreg %src0, half inreg %src1) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_min_f16_e64 v0, s16, s17
-; GFX10-NEXT:    v_cmp_o_f16_e64 vcc_lo, s16, s17
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, s16, s17
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, 0x7e00, s4
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v0
@@ -504,10 +504,10 @@ define void @s_minimum_f16(half inreg %src0, half inreg %src1) {
 ; GFX11-TRUE16-LABEL: s_minimum_f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, s0, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, s0, s1
 ; GFX11-TRUE16-NEXT:    v_min_f16_e64 v0.l, s0, s1
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, 0x7e00, s2
 ; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX11-TRUE16-NEXT:    ;;#ASMSTART
 ; GFX11-TRUE16-NEXT:    ; use v0
@@ -518,9 +518,9 @@ define void @s_minimum_f16(half inreg %src0, half inreg %src1) {
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    v_min_f16_e64 v0, s0, s1
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e64 s0, s0, s1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v0, v0, 0x7e00, s0
 ; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX11-FAKE16-NEXT:    ;;#ASMSTART
 ; GFX11-FAKE16-NEXT:    ; use v0
@@ -823,14 +823,14 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX8-NEXT:    s_lshr_b32 s4, s17, 16
 ; GFX8-NEXT:    s_lshr_b32 s5, s16, 16
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s4
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, s5, v0
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, s5, v0
 ; GFX8-NEXT:    v_min_f16_e32 v0, s5, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0x7e00
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s17
-; GFX8-NEXT:    v_cndmask_b32_sdwa v0, v1, v0, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_cndmask_b32_sdwa v0, v0, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_min_f16_e32 v3, s16, v2
-; GFX8-NEXT:    v_cmp_o_f16_e32 vcc, s16, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f16_e32 vcc, s16, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v0
@@ -845,12 +845,12 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX900-NEXT:    s_lshr_b32 s4, s17, 16
 ; GFX900-NEXT:    v_pk_min_f16 v1, s16, v1
 ; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7e00
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, s16, v0
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, s16, v0
 ; GFX900-NEXT:    s_lshr_b32 s5, s16, 16
 ; GFX900-NEXT:    v_mov_b32_e32 v3, s4
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX900-NEXT:    v_cmp_o_f16_e32 vcc, s5, v3
-; GFX900-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX900-NEXT:    v_cmp_u_f16_e32 vcc, s5, v3
+; GFX900-NEXT:    v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; GFX900-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX900-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
 ; GFX900-NEXT:    ;;#ASMSTART
@@ -872,16 +872,16 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX10-LABEL: s_minimum_v2f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_pk_min_f16 v0, s16, s17
+; GFX10-NEXT:    v_cmp_u_f16_e64 s5, s16, s17
 ; GFX10-NEXT:    s_lshr_b32 s4, s17, 16
+; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, 0x7e00, s5
 ; GFX10-NEXT:    s_lshr_b32 s5, s16, 16
-; GFX10-NEXT:    v_pk_min_f16 v0, s16, s17
-; GFX10-NEXT:    v_cmp_o_f16_e64 vcc_lo, s5, s4
-; GFX10-NEXT:    v_cmp_o_f16_e64 s4, s16, s17
-; GFX10-NEXT:    v_mov_b32_e32 v1, 0x7e00
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0x7e00, v0, s4
-; GFX10-NEXT:    v_cndmask_b32_sdwa v0, v1, v0, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v2
-; GFX10-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
+; GFX10-NEXT:    v_cmp_u_f16_e64 s4, s5, s4
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x7e00, s4
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v0
 ; GFX10-NEXT:    ;;#ASMEND
@@ -890,16 +890,16 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX11-TRUE16-LABEL: s_minimum_v2f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, s0, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, s0, s1
 ; GFX11-TRUE16-NEXT:    v_pk_min_f16 v0, s0, s1
 ; GFX11-TRUE16-NEXT:    s_lshr_b32 s1, s1, 16
 ; GFX11-TRUE16-NEXT:    s_lshr_b32 s0, s0, 16
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, s0, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s0, s0, s1
 ; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, 0x7e00, s2
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, 0x7e00, s0
 ; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-TRUE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
@@ -912,16 +912,16 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    v_pk_min_f16 v0, s0, s1
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
 ; GFX11-FAKE16-NEXT:    s_lshr_b32 s2, s1, 16
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e64 s1, s0, s1
 ; GFX11-FAKE16-NEXT:    s_lshr_b32 s0, s0, 16
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e64 s0, s0, s2
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s2
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v0, v0, 0x7e00, s1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v1, v1, 0x7e00, s0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
 ; GFX11-FAKE16-NEXT:    ;;#ASMSTART
 ; GFX11-FAKE16-NEXT:    ; use v0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
index 0215795467323..9d0c2babca0e6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
@@ -389,8 +389,8 @@ define void @s_minimum_f32(float inreg %src0, float inreg %src1) {
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s17
 ; GFX7-NEXT:    v_min_f32_e32 v1, s16, v0
 ; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, s16, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX7-NEXT:    ;;#ASMSTART
 ; GFX7-NEXT:    ; use v0
 ; GFX7-NEXT:    ;;#ASMEND
@@ -402,8 +402,8 @@ define void @s_minimum_f32(float inreg %src0, float inreg %src1) {
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s17
 ; GFX8-NEXT:    v_min_f32_e32 v1, s16, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, s16, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v0
 ; GFX8-NEXT:    ;;#ASMEND
@@ -415,8 +415,8 @@ define void @s_minimum_f32(float inreg %src0, float inreg %src1) {
 ; GFX900-NEXT:    v_mov_b32_e32 v0, s17
 ; GFX900-NEXT:    v_min_f32_e32 v1, s16, v0
 ; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, s16, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use v0
 ; GFX900-NEXT:    ;;#ASMEND
@@ -436,8 +436,8 @@ define void @s_minimum_f32(float inreg %src0, float inreg %src1) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_min_f32_e64 v0, s16, s17
-; GFX10-NEXT:    v_cmp_o_f32_e64 vcc_lo, s16, s17
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX10-NEXT:    v_cmp_u_f32_e64 s4, s16, s17
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, 0x7fc00000, s4
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v0
 ; GFX10-NEXT:    ;;#ASMEND
@@ -447,9 +447,9 @@ define void @s_minimum_f32(float inreg %src0, float inreg %src1) {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_min_f32_e64 v0, s0, s1
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s0, s1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e64 s0, s0, s1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, 0x7fc00000, s0
 ; GFX11-NEXT:    ;;#ASMSTART
 ; GFX11-NEXT:    ; use v0
 ; GFX11-NEXT:    ;;#ASMEND
@@ -752,12 +752,12 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s19
 ; GFX7-NEXT:    v_min_f32_e32 v1, s17, v0
 ; GFX7-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, s17, v0
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, s17, v0
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s18
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX7-NEXT:    v_min_f32_e32 v3, s16, v0
-; GFX7-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, s16, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
 ; GFX7-NEXT:    ;;#ASMSTART
 ; GFX7-NEXT:    ; use v[0:1]
 ; GFX7-NEXT:    ;;#ASMEND
@@ -769,12 +769,12 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s19
 ; GFX8-NEXT:    v_min_f32_e32 v1, s17, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, s17, v0
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, s17, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s18
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX8-NEXT:    v_min_f32_e32 v3, s16, v0
-; GFX8-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, s16, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
 ; GFX8-NEXT:    ;;#ASMSTART
 ; GFX8-NEXT:    ; use v[0:1]
 ; GFX8-NEXT:    ;;#ASMEND
@@ -786,12 +786,12 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX900-NEXT:    v_mov_b32_e32 v0, s19
 ; GFX900-NEXT:    v_min_f32_e32 v1, s17, v0
 ; GFX900-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, s17, v0
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, s17, v0
 ; GFX900-NEXT:    v_mov_b32_e32 v0, s18
-; GFX900-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; GFX900-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX900-NEXT:    v_min_f32_e32 v3, s16, v0
-; GFX900-NEXT:    v_cmp_o_f32_e32 vcc, s16, v0
-; GFX900-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX900-NEXT:    v_cmp_u_f32_e32 vcc, s16, v0
+; GFX900-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
 ; GFX900-NEXT:    ;;#ASMSTART
 ; GFX900-NEXT:    ; use v[0:1]
 ; GFX900-NEXT:    ;;#ASMEND
@@ -813,11 +813,11 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_min_f32_e64 v0, s17, s19
-; GFX10-NEXT:    v_cmp_o_f32_e64 vcc_lo, s17, s19
+; GFX10-NEXT:    v_cmp_u_f32_e64 s4, s17, s19
 ; GFX10-NEXT:    v_min_f32_e64 v2, s16, s18
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
-; GFX10-NEXT:    v_cmp_o_f32_e64 vcc_lo, s16, s18
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v0, 0x7fc00000, s4
+; GFX10-NEXT:    v_cmp_u_f32_e64 s4, s16, s18
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, 0x7fc00000, s4
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; use v[0:1]
 ; GFX10-NEXT:    ;;#ASMEND
@@ -827,12 +827,12 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_min_f32_e64 v0, s1, s3
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s1, s3
 ; GFX11-NEXT:    v_min_f32_e64 v2, s0, s2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s0, s2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e64 s1, s1, s3
+; GFX11-NEXT:    v_cmp_u_f32_e64 s0, s0, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, v0, 0x7fc00000, s1
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, 0x7fc00000, s0
 ; GFX11-NEXT:    ;;#ASMSTART
 ; GFX11-NEXT:    ; use v[0:1]
 ; GFX11-NEXT:    ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
index 696832ddc6d27..801be9a212b0c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
@@ -87,10 +87,9 @@ define amdgpu_kernel void @v_round_f64(ptr addrspace(1) %out, ptr addrspace(1) %
 ; SI-NEXT:    v_not_b32_e32 v5, v5
 ; SI-NEXT:    v_not_b32_e32 v4, v4
 ; SI-NEXT:    v_and_b32_e32 v5, v3, v5
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v6
 ; SI-NEXT:    v_and_b32_e32 v4, v2, v4
-; SI-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
 ; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v6
+; SI-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 51, v6
 ; SI-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll
index 030c332850124..b6640cec0ecc1 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll
@@ -595,18 +595,18 @@ define half @add_select_negk_fabs_f16(i32 %c, half %x, half %y) {
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
 ; VI-NEXT:    v_mov_b32_e32 v3, 0xbc00
-; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-NEXT:    v_add_f16_e32 v0, v0, v2
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SAFE-TRUE16-LABEL: add_select_negk_fabs_f16:
 ; GFX11-SAFE-TRUE16:       ; %bb.0:
 ; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX11-SAFE-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v1.l
 ; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, 0xbc00, vcc_lo
 ; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v2.l
 ; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -614,19 +614,19 @@ define half @add_select_negk_fabs_f16(i32 %c, half %x, half %y) {
 ; GFX11-SAFE-FAKE16:       ; %bb.0:
 ; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e64 v0, v1, 0xbc00, vcc_lo
 ; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v2
 ; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-NSZ-TRUE16-LABEL: add_select_negk_fabs_f16:
 ; GFX11-NSZ-TRUE16:       ; %bb.0:
 ; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX11-NSZ-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v1.l
 ; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, 0xbc00, vcc_lo
 ; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v2.l
 ; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -634,9 +634,9 @@ define half @add_select_negk_fabs_f16(i32 %c, half %x, half %y) {
 ; GFX11-NSZ-FAKE16:       ; %bb.0:
 ; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e64 v0, v1, 0xbc00, vcc_lo
 ; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v2
 ; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
@@ -665,18 +665,18 @@ define half @add_select_negliteralk_fabs_f16(i32 %c, half %x, half %y) {
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
 ; VI-NEXT:    v_mov_b32_e32 v3, 0xe400
-; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, v1, v3, vcc
 ; VI-NEXT:    v_add_f16_e32 v0, v0, v2
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SAFE-TRUE16-LABEL: add_select_negliteralk_fabs_f16:
 ; GFX11-SAFE-TRUE16:       ; %bb.0:
 ; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX11-SAFE-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v1.l
 ; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xe400, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, 0xe400, vcc_lo
 ; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v2.l
 ; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -684,19 +684,19 @@ define half @add_select_negliteralk_fabs_f16(i32 %c, half %x, half %y) {
 ; GFX11-SAFE-FAKE16:       ; %bb.0:
 ; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xe400, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e64 v0, v1, 0xe400, vcc_lo
 ; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v2
 ; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-NSZ-TRUE16-LABEL: add_select_negliteralk_fabs_f16:
 ; GFX11-NSZ-TRUE16:       ; %bb.0:
 ; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX11-NSZ-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v1.l
 ; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xe400, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, 0xe400, vcc_lo
 ; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v2.l
 ; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -704,9 +704,9 @@ define half @add_select_negliteralk_fabs_f16(i32 %c, half %x, half %y) {
 ; GFX11-NSZ-FAKE16:       ; %bb.0:
 ; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xe400, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e64 v0, v1, 0xe400, vcc_lo
 ; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v2
 ; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
index a680ba5933418..3f6034f4392a1 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
@@ -166,9 +166,9 @@ define amdgpu_kernel void @add_select_posk_posk_f32(i32 %c) #0 {
 ; GCN: buffer_load_dword [[X:v[0-9]+]]
 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
 
-; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
+; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
-; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, |[[X]]|, [[VCC]]
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[X]]|, -1.0, [[VCC]]
 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
 define amdgpu_kernel void @add_select_negk_fabs_f32(i32 %c) #0 {
   %x = load volatile float, ptr addrspace(1) poison
@@ -186,9 +186,9 @@ define amdgpu_kernel void @add_select_negk_fabs_f32(i32 %c) #0 {
 ; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]]
 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xc4800000
 
-; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
+; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
-; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], |[[X]]|, [[VCC]]
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[X]]|, [[K]], [[VCC]]
 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
 define amdgpu_kernel void @add_select_negliteralk_fabs_f32(i32 %c) #0 {
   %x = load volatile float, ptr addrspace(1) poison
@@ -223,9 +223,9 @@ define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) #0 {
 ; GCN: buffer_load_dword [[X:v[0-9]+]]
 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
 
-; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
+; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
-; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, |[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[X]]|, 1.0, s{{\[[0-9]+:[0-9]+\]}}
 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
 define amdgpu_kernel void @add_select_posk_fabs_f32(i32 %c) #0 {
   %x = load volatile float, ptr addrspace(1) poison
@@ -458,9 +458,9 @@ define amdgpu_kernel void @add_select_fneg_negk_negk_f32(i32 %c) #0 {
 ; GCN: buffer_load_dword [[X:v[0-9]+]]
 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
 
-; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
-; GCN: s_cselect_b64 vcc, -1, 0
-; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
+; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0
+; GCN: s_cselect_b64 s[0:1], -1, 0
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[X]], 1.0, s[0:1]
 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
 define amdgpu_kernel void @add_select_negk_fneg_f32(i32 %c) #0 {
   %x = load volatile float, ptr addrspace(1) poison
@@ -494,9 +494,9 @@ define amdgpu_kernel void @add_select_fneg_posk_f32(i32 %c) #0 {
 ; GCN: buffer_load_dword [[X:v[0-9]+]]
 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
 
-; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
-; GCN: s_cselect_b64 vcc, -1, 0
-; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
+; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0
+; GCN: s_cselect_b64 s[0:1], -1, 0
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[X]], -1.0, s[0:1]
 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
 define amdgpu_kernel void @add_select_posk_fneg_f32(i32 %c) #0 {
   %x = load volatile float, ptr addrspace(1) poison
@@ -657,9 +657,9 @@ define amdgpu_kernel void @mul_select_negfabs_posk_f32(i32 %c) #0 {
 ; GCN: buffer_load_dword [[X:v[0-9]+]]
 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
 
-; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
+; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
 ; GCN: s_cselect_b64  [[VCC:.*]], -1, 0
-; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 4.0, -|[[X]]|, [[VCC]]
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -|[[X]]|, 4.0, [[VCC]]
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
 define amdgpu_kernel void @mul_select_posk_negfabs_f32(i32 %c) #0 {
   %x = load volatile float, ptr addrspace(1) poison
@@ -695,9 +695,9 @@ define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) #0 {
 ; GCN: buffer_load_dword [[X:v[0-9]+]]
 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
 
-; GCN: s_cmp_lg_u32
+; GCN: s_cmp_eq_u32
 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
-; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, [[VCC]]
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -|[[X]]|, -4.0, [[VCC]]
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
 define amdgpu_kernel void @mul_select_negk_negfabs_f32(i32 %c) #0 {
   %x = load volatile float, ptr addrspace(1) poison
@@ -847,16 +847,16 @@ define amdgpu_kernel void @mul_select_negfabs_posk_inv2pi_f32(i32 %c) #0 {
 ; GCN: buffer_load_dword [[X:v[0-9]+]]
 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
 
-; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
+; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
 
 ; GCN-DAG: s_cselect_b64  [[VCC:.*]], -1, 0
 
 ; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
-; SI: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], -|[[X]]|, [[VCC]]
+; SI: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -|[[X]]|, [[K]], [[VCC]]
 ; SI: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
 
 
-; VI: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0.15915494, -|[[X]]|, [[VCC]]
+; VI: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -|[[X]]|, 0.15915494, [[VCC]]
 ; VI: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
 define amdgpu_kernel void @mul_select_posk_inv2pi_negfabs_f32(i32 %c) #0 {
   %x = load volatile float, ptr addrspace(1) poison
@@ -893,9 +893,9 @@ define amdgpu_kernel void @mul_select_negfabs_negk_inv2pi_f32(i32 %c) #0 {
 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
 
 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
-; GCN: s_cmp_lg_u32
+; GCN: s_cmp_eq_u32
 ; GCN: s_cselect_b64 s[0:1], -1, 0
-; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], -|[[X]]|, s[0:1]
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -|[[X]]|, [[K]], s[0:1]
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
 define amdgpu_kernel void @mul_select_negk_inv2pi_negfabs_f32(i32 %c) #0 {
   %x = load volatile float, ptr addrspace(1) poison
@@ -933,9 +933,9 @@ define amdgpu_kernel void @mul_select_negfabs_posk_0_f32(i32 %c) #0 {
 ; GCN: buffer_load_dword [[X:v[0-9]+]]
 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
 
-; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
+; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
 ; GCN: s_cselect_b64  [[VCC:.*]], -1, 0
-; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0, -|[[X]]|, [[VCC]]
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -|[[X]]|, 0, [[VCC]]
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
 define amdgpu_kernel void @mul_select_posk_0_negfabs_f32(i32 %c) #0 {
   %x = load volatile float, ptr addrspace(1) poison
@@ -973,9 +973,9 @@ define amdgpu_kernel void @mul_select_negfabs_negk_0_f32(i32 %c) #0 {
 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
 
 ; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
-; GCN: s_cmp_lg_u32
+; GCN: s_cmp_eq_u32
 ; GCN: s_cselect_b64 s[0:1], -1, 0
-; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[NEG0]], -|[[X]]|, s[0:1]
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -|[[X]]|, [[NEG0]], s[0:1]
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
 define amdgpu_kernel void @mul_select_negk_0_negfabs_f32(i32 %c) #0 {
   %x = load volatile float, ptr addrspace(1) poison
diff --git a/llvm/test/CodeGen/AMDGPU/select.f16.ll b/llvm/test/CodeGen/AMDGPU/select.f16.ll
index 51de691e0eccc..28272745ec994 100644
--- a/llvm/test/CodeGen/AMDGPU/select.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select.f16.ll
@@ -492,8 +492,8 @@ define amdgpu_kernel void @select_f16_imm_c(
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v1
-; SI-NEXT:    v_cndmask_b32_e32 v0, 0.5, v2, vcc
+; SI-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-NEXT:    v_cndmask_b32_e64 v0, v2, 0.5, vcc
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-NEXT:    buffer_store_short v0, off, s[8:11], 0
 ; SI-NEXT:    s_endpgm
@@ -525,8 +525,8 @@ define amdgpu_kernel void @select_f16_imm_c(
 ; VI-NEXT:    v_mov_b32_e32 v3, 0x3800
 ; VI-NEXT:    s_mov_b32 s8, s0
 ; VI-NEXT:    s_mov_b32 s9, s1
-; VI-NEXT:    v_cmp_nlt_f16_e32 vcc, v0, v1
-; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; VI-NEXT:    v_cmp_lt_f16_e32 vcc, v0, v1
+; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
 ; VI-NEXT:    buffer_store_short v0, off, s[8:11], 0
 ; VI-NEXT:    s_endpgm
 ;
@@ -554,10 +554,10 @@ define amdgpu_kernel void @select_f16_imm_c(
 ; GFX11-TRUE16-NEXT:    buffer_load_u16 v2, off, s[12:15], 0 glc dlc
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-TRUE16-NEXT:    s_mov_b32 s9, s1
-; GFX11-TRUE16-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v0.l, v1.l
 ; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v2.l
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3800, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, 0x3800, vcc_lo
 ; GFX11-TRUE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
 ; GFX11-TRUE16-NEXT:    s_endpgm
 ;
@@ -587,8 +587,8 @@ define amdgpu_kernel void @select_f16_imm_c(
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-FAKE16-NEXT:    s_mov_b32 s8, s0
 ; GFX11-FAKE16-NEXT:    s_mov_b32 s9, s1
-; GFX11-FAKE16-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v0, v1
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3800, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v0, v2, 0x3800, vcc_lo
 ; GFX11-FAKE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
 ; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
@@ -1328,8 +1328,8 @@ define amdgpu_kernel void @select_v2f16_imm_c(
 ; SI-NEXT:    s_mov_b32 s8, s0
 ; SI-NEXT:    s_mov_b32 s9, s1
 ; SI-NEXT:    s_waitcnt vmcnt(2)
-; SI-NEXT:    v_cvt_f32_f16_e32 v4, v0
-; SI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; SI-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; SI-NEXT:    v_cvt_f32_f16_e32 v4, v4
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-NEXT:    s_waitcnt vmcnt(1)
 ; SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
@@ -1339,14 +1339,14 @@ define amdgpu_kernel void @select_v2f16_imm_c(
 ; SI-NEXT:    v_cvt_f32_f16_e32 v6, v6
 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v5
-; SI-NEXT:    v_cndmask_b32_e32 v0, v3, v6, vcc
-; SI-NEXT:    v_cmp_nlt_f32_e32 vcc, v4, v1
+; SI-NEXT:    v_cmp_lt_f32_e32 vcc, v4, v5
+; SI-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; SI-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; SI-NEXT:    v_cndmask_b32_e64 v0, v2, 0.5, vcc
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT:    v_cndmask_b32_e32 v1, 0.5, v2, vcc
-; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; SI-NEXT:    v_or_b32_e32 v0, v1, v0
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
+; SI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; SI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
 ; SI-NEXT:    s_endpgm
 ;
@@ -1378,12 +1378,12 @@ define amdgpu_kernel void @select_v2f16_imm_c(
 ; VI-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
 ; VI-NEXT:    s_waitcnt vmcnt(1)
 ; VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
-; VI-NEXT:    v_cmp_nlt_f16_e32 vcc, v1, v0
+; VI-NEXT:    v_cmp_lt_f16_e32 vcc, v1, v0
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
-; VI-NEXT:    v_cmp_nlt_f16_e32 vcc, v5, v4
+; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; VI-NEXT:    v_cmp_lt_f16_e32 vcc, v5, v4
 ; VI-NEXT:    v_mov_b32_e32 v1, 0x3900
-; VI-NEXT:    v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
 ; VI-NEXT:    s_endpgm
@@ -1414,13 +1414,13 @@ define amdgpu_kernel void @select_v2f16_imm_c(
 ; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(1)
 ; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
-; GFX11-TRUE16-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v1.l, v0.l
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT:    v_cmp_nlt_f16_e64 s0, v4.l, v3.l
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x3800, v2.l, vcc_lo
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3900, v0.l, s0
+; GFX11-TRUE16-NEXT:    v_cmp_lt_f16_e64 s0, v4.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v2.l, 0x3800, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, 0x3900, s0
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX11-TRUE16-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX11-TRUE16-NEXT:    s_mov_b32 s0, s4
@@ -1455,14 +1455,14 @@ define amdgpu_kernel void @select_v2f16_imm_c(
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(1)
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
-; GFX11-FAKE16-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v1, v0
+; GFX11-FAKE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v1, v0
 ; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3800, v2, vcc_lo
-; GFX11-FAKE16-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v4, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v0, v2, 0x3800, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v4, v3
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3900, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v1, v5, 0x3900, vcc_lo
 ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
 ; GFX11-FAKE16-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-vgpr-to-sgpr-return.ll b/llvm/test/CodeGen/AMDGPU/uniform-vgpr-to-sgpr-return.ll
index 14b91793bd8da..c5da39b3f9d39 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-vgpr-to-sgpr-return.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-vgpr-to-sgpr-return.ll
@@ -6,9 +6,9 @@ define amdgpu_ps i32 @uniform_v_to_s_i32(float inreg %a, float inreg %b) {
 ; GFX11-LABEL: uniform_v_to_s_i32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_max_f32_e64 v0, s0, s1
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s0, s1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e64 s0, s0, s1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, 0x7fc00000, s0
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
   %max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -37,14 +37,14 @@ define amdgpu_ps <2 x i32> @uniform_v_to_s_2_i32(<2 x float> inreg %a, <2 x floa
 ; GFX11-LABEL: uniform_v_to_s_2_i32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_max_f32_e64 v0, s0, s2
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s0, s2
+; GFX11-NEXT:    v_cmp_u_f32_e64 s0, s0, s2
 ; GFX11-NEXT:    v_max_f32_e64 v1, s1, s3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s1, s3
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, 0x7fc00000, s0
+; GFX11-NEXT:    v_cmp_u_f32_e64 s0, s1, s3
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x7fc00000, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7fc00000, v1, vcc_lo
 ; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX11-NEXT:    ; return to shader part epilog
   %max0 = call <2 x float> @llvm.maximum.f32(<2 x float> %a, <2 x float> %b)
@@ -94,9 +94,9 @@ define amdgpu_ps <2 x i16> @uniform_v_to_s_2_i16(float inreg %a, float inreg %b)
 ; GFX11-LABEL: uniform_v_to_s_2_i16:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_max_f32_e64 v0, s0, s1
-; GFX11-NEXT:    v_cmp_o_f32_e64 vcc_lo, s0, s1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_u_f32_e64 s0, s0, s1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, 0x7fc00000, s0
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
   %max0 = call float @llvm.maximum.f32(float %a, float %b)
@@ -107,19 +107,19 @@ define amdgpu_ps <2 x i16> @uniform_v_to_s_2_i16(float inreg %a, float inreg %b)
 define amdgpu_ps i16 @uniform_v_to_s_i16(half inreg %a, half inreg %b) {
 ; GFX11-TRUE16-LABEL: uniform_v_to_s_i16:
 ; GFX11-TRUE16:       ; %bb.0:
-; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, s0, s1
+; GFX11-TRUE16-NEXT:    v_cmp_u_f16_e64 s2, s0, s1
 ; GFX11-TRUE16-NEXT:    v_max_f16_e64 v0.l, s0, s1
 ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, 0x7e00, s2
 ; GFX11-TRUE16-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-TRUE16-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-FAKE16-LABEL: uniform_v_to_s_i16:
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    v_max_f16_e64 v0, s0, s1
-; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
-; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_u_f16_e64 s0, s0, s1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e64 v0, v0, 0x7e00, s0
 ; GFX11-FAKE16-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-FAKE16-NEXT:    ; return to shader part epilog
   %max = call half @llvm.maximum.f16(half %a, half %b)
diff --git a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
index 1ef4164a93b9f..6dbf2cd45d55a 100644
--- a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
@@ -134,10 +134,10 @@ define amdgpu_kernel void @v_cnd_nan(ptr addrspace(1) %out, i32 %c, float %f) #0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b32 s4, s0
 ; SI-NEXT:    s_mov_b32 s5, s1
-; SI-NEXT:    s_cmp_eq_u32 s2, 0
+; SI-NEXT:    s_cmp_lg_u32 s2, 0
 ; SI-NEXT:    v_mov_b32_e32 v0, s3
-; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; SI-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; SI-NEXT:    v_cndmask_b32_e64 v0, v0, -1, s[0:1]
 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
@@ -145,10 +145,10 @@ define amdgpu_kernel void @v_cnd_nan(ptr addrspace(1) %out, i32 %c, float %f) #0
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_cmp_eq_u32 s2, 0
+; VI-NEXT:    s_cmp_lg_u32 s2, 0
 ; VI-NEXT:    v_mov_b32_e32 v0, s3
-; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    v_cndmask_b32_e32 v2, -1, v0, vcc
+; VI-NEXT:    s_cselect_b64 s[2:3], -1, 0
+; VI-NEXT:    v_cndmask_b32_e64 v2, v0, -1, s[2:3]
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    flat_store_dword v[0:1], v2
@@ -159,9 +159,9 @@ define amdgpu_kernel void @v_cnd_nan(ptr addrspace(1) %out, i32 %c, float %f) #0
 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX10-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX10-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, -1, s3, s[4:5]
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, s3, -1, s[4:5]
 ; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
 ;
@@ -170,10 +170,10 @@ define amdgpu_kernel void @v_cnd_nan(ptr addrspace(1) %out, i32 %c, float %f) #0
 ; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX11-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX11-NEXT:    s_cselect_b64 s[4:5], -1, 0
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, -1, s3, s[4:5]
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, s3, -1, s[4:5]
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_endpgm
 ;
@@ -182,8 +182,8 @@ define amdgpu_kernel void @v_cnd_nan(ptr addrspace(1) %out, i32 %c, float %f) #0
 ; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GFX12-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    s_cmp_eq_u32 s2, 0
-; GFX12-NEXT:    s_cselect_b32 s2, s3, -1
+; GFX12-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX12-NEXT:    s_cselect_b32 s2, -1, s3
 ; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX12-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -208,8 +208,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %o
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v2, s5
-; SI-NEXT:    v_cmp_nlg_f32_e64 vcc, s4, 0
-; SI-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; SI-NEXT:    v_cmp_lg_f32_e64 s[4:5], s4, 0
+; SI-NEXT:    v_cndmask_b32_e64 v2, v2, 1.0, s[4:5]
 ; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
@@ -221,10 +221,10 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %o
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    v_mov_b32_e32 v2, s3
-; VI-NEXT:    v_cmp_nlg_f32_e64 vcc, s2, 0
-; VI-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; VI-NEXT:    v_cmp_lg_f32_e64 s[0:1], s2, 0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    v_cndmask_b32_e64 v2, v2, 1.0, s[0:1]
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
@@ -235,8 +235,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %o
 ; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_cmp_nlg_f32_e64 s[4:5], s0, 0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, 1.0, s1, s[4:5]
+; GFX10-NEXT:    v_cmp_lg_f32_e64 s[4:5], s0, 0
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, s1, 1.0, s[4:5]
 ; GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
 ; GFX10-NEXT:    s_endpgm
 ;
@@ -249,8 +249,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_nlg_f32_e64 s[4:5], s0, 0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, 1.0, s1, s[4:5]
+; GFX11-NEXT:    v_cmp_lg_f32_e64 s[4:5], s0, 0
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, s1, 1.0, s[4:5]
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[2:3]
 ; GFX11-NEXT:    s_endpgm
 ;
@@ -263,8 +263,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %o
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    s_cmp_nlg_f32 s0, 0
-; GFX12-NEXT:    s_cselect_b32 s0, s1, 1.0
+; GFX12-NEXT:    s_cmp_lg_f32 s0, 0
+; GFX12-NEXT:    s_cselect_b32 s0, 1.0, s1
 ; GFX12-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX12-NEXT:    global_store_b32 v0, v1, s[2:3]
 ; GFX12-NEXT:    s_endpgm
@@ -288,8 +288,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(ptr addrspace(1) %o
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v2, s4
-; SI-NEXT:    v_cmp_nlg_f32_e64 vcc, s4, 0
-; SI-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; SI-NEXT:    v_cmp_lg_f32_e64 s[4:5], s4, 0
+; SI-NEXT:    v_cndmask_b32_e64 v2, v2, 1.0, s[4:5]
 ; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
@@ -301,10 +301,10 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(ptr addrspace(1) %o
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_cmp_nlg_f32_e64 vcc, s2, 0
-; VI-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; VI-NEXT:    v_cmp_lg_f32_e64 s[0:1], s2, 0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    v_cndmask_b32_e64 v2, v2, 1.0, s[0:1]
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
@@ -315,8 +315,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(ptr addrspace(1) %o
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_cmp_nlg_f32_e64 s[2:3], s6, 0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, 1.0, s6, s[2:3]
+; GFX10-NEXT:    v_cmp_lg_f32_e64 s[2:3], s6, 0
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, s6, 1.0, s[2:3]
 ; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
 ;
@@ -329,8 +329,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_nlg_f32_e64 s[2:3], s6, 0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, 1.0, s6, s[2:3]
+; GFX11-NEXT:    v_cmp_lg_f32_e64 s[2:3], s6, 0
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, s6, 1.0, s[2:3]
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_endpgm
 ;
@@ -341,8 +341,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(ptr addrspace(1) %o
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    s_cmp_nlg_f32 s2, 0
-; GFX12-NEXT:    s_cselect_b32 s2, s2, 1.0
+; GFX12-NEXT:    s_cmp_lg_f32 s2, 0
+; GFX12-NEXT:    s_cselect_b32 s2, 1.0, s2
 ; GFX12-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX12-NEXT:    s_endpgm
@@ -366,8 +366,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(ptr addrspace(1) %o
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v2, s5
-; SI-NEXT:    v_cmp_nlg_f32_e64 vcc, s4, 0
-; SI-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-NEXT:    v_cmp_lg_f32_e64 s[4:5], s4, 0
+; SI-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
 ; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
@@ -379,10 +379,10 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(ptr addrspace(1) %o
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    v_mov_b32_e32 v2, s3
-; VI-NEXT:    v_cmp_nlg_f32_e64 vcc, s2, 0
-; VI-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-NEXT:    v_cmp_lg_f32_e64 s[0:1], s2, 0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[0:1]
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
@@ -393,8 +393,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(ptr addrspace(1) %o
 ; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_cmp_nlg_f32_e64 s[4:5], s0, 0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, s1, s[4:5]
+; GFX10-NEXT:    v_cmp_lg_f32_e64 s[4:5], s0, 0
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, s1, 0, s[4:5]
 ; GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
 ; GFX10-NEXT:    s_endpgm
 ;
@@ -407,8 +407,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_nlg_f32_e64 s[4:5], s0, 0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, s1, s[4:5]
+; GFX11-NEXT:    v_cmp_lg_f32_e64 s[4:5], s0, 0
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, s1, 0, s[4:5]
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[2:3]
 ; GFX11-NEXT:    s_endpgm
 ;
@@ -421,8 +421,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(ptr addrspace(1) %o
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    s_cmp_nlg_f32 s0, 0
-; GFX12-NEXT:    s_cselect_b32 s0, s1, 0
+; GFX12-NEXT:    s_cmp_lg_f32 s0, 0
+; GFX12-NEXT:    s_cselect_b32 s0, 0, s1
 ; GFX12-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX12-NEXT:    global_store_b32 v0, v1, s[2:3]
 ; GFX12-NEXT:    s_endpgm
@@ -446,8 +446,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(ptr addrspace(1) %o
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v2, s4
-; SI-NEXT:    v_cmp_nlg_f32_e64 vcc, s4, 0
-; SI-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-NEXT:    v_cmp_lg_f32_e64 s[4:5], s4, 0
+; SI-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[4:5]
 ; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
@@ -459,10 +459,10 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(ptr addrspace(1) %o
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_cmp_nlg_f32_e64 vcc, s2, 0
-; VI-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-NEXT:    v_cmp_lg_f32_e64 s[0:1], s2, 0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    v_cndmask_b32_e64 v2, v2, 0, s[0:1]
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
@@ -473,8 +473,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(ptr addrspace(1) %o
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_cmp_nlg_f32_e64 s[2:3], s6, 0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, s6, s[2:3]
+; GFX10-NEXT:    v_cmp_lg_f32_e64 s[2:3], s6, 0
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, s6, 0, s[2:3]
 ; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
 ;
@@ -487,8 +487,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_nlg_f32_e64 s[2:3], s6, 0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, s6, s[2:3]
+; GFX11-NEXT:    v_cmp_lg_f32_e64 s[2:3], s6, 0
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, s6, 0, s[2:3]
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_endpgm
 ;
@@ -499,8 +499,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(ptr addrspace(1) %o
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    s_cmp_nlg_f32 s2, 0
-; GFX12-NEXT:    s_cselect_b32 s2, s2, 0
+; GFX12-NEXT:    s_cmp_lg_f32 s2, 0
+; GFX12-NEXT:    s_cselect_b32 s2, 0, s2
 ; GFX12-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX12-NEXT:    s_endpgm
@@ -732,8 +732,8 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %o
 ; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    v_mov_b32_e32 v3, s8
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v2
-; SI-NEXT:    v_cndmask_b32_e32 v2, 1.0, v3, vcc
+; SI-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v2
+; SI-NEXT:    v_cndmask_b32_e64 v2, v3, 1.0, vcc
 ; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
@@ -752,8 +752,8 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %o
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    v_mov_b32_e32 v4, s4
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v3
-; VI-NEXT:    v_cndmask_b32_e32 v2, 1.0, v4, vcc
+; VI-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v3
+; VI-NEXT:    v_cndmask_b32_e64 v2, v4, 1.0, vcc
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
@@ -765,8 +765,8 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %o
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    global_load_dword v1, v0, s[2:3]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, 1.0, s4, vcc
+; GFX10-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v1
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, s4, 1.0, vcc
 ; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
 ;
@@ -780,8 +780,8 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, 1.0, s4, vcc
+; GFX11-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v1
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, s4, 1.0, vcc
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_endpgm
 ;
@@ -795,8 +795,8 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %o
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-NEXT:    global_load_b32 v1, v0, s[2:3]
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
-; GFX12-NEXT:    v_cndmask_b32_e64 v1, 1.0, s4, vcc
+; GFX12-NEXT:    v_cmp_gt_f32_e32 vcc, 0, v1
+; GFX12-NEXT:    v_cndmask_b32_e64 v1, s4, 1.0, vcc
 ; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX12-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
diff --git a/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll b/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
index de94ee9550944..89ca7e03bdcd6 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
@@ -14,10 +14,10 @@
 
 ; GCN-PROMOTE: s_cmp_eq_u32 s{{[0-9]+}}, 1
 ; GCN-PROMOTE: s_cselect_b32 [[IND1:s[0-9]+]], 1, 0
-; GCN-PROMOTE: s_cmp_lg_u32 s{{[0-9]+}}, 2
-; GCN-PROMOTE: s_cselect_b32 [[IND2:s[0-9]+]], [[IND1]], 2
-; GCN-PROMOTE: s_cmp_lg_u32 s{{[0-9]+}}, 3
-; GCN-PROMOTE: s_cselect_b32 [[IND3:s[0-9]+]], [[IND2]], 3
+; GCN-PROMOTE: s_cmp_eq_u32 s{{[0-9]+}}, 2
+; GCN-PROMOTE: s_cselect_b32 [[IND2:s[0-9]+]], 2, [[IND1]]
+; GCN-PROMOTE: s_cmp_eq_u32 s{{[0-9]+}}, 3
+; GCN-PROMOTE: s_cselect_b32 [[IND3:s[0-9]+]], 3, [[IND2]]
 ; GCN-PROMOTE: ScratchSize: 0
 
 define amdgpu_kernel void @vector_read_alloca_bitcast(ptr addrspace(1) %out, i32 %index) {
@@ -288,10 +288,10 @@ entry:
 
 ; GCN-PROMOTE: s_cmp_eq_u32 s{{[0-9]+}}, 1
 ; GCN-PROMOTE: s_cselect_b32 [[IND1:s[0-9]+]], 1, 0
-; GCN-PROMOTE: s_cmp_lg_u32 s{{[0-9]+}}, 2
-; GCN-PROMOTE: s_cselect_b32 [[IND2:s[0-9]+]], [[IND1]], 2
-; GCN-PROMOTE: s_cmp_lg_u32 s{{[0-9]+}}, 3
-; GCN-PROMOTE: s_cselect_b32 [[IND3:s[0-9]+]], [[IND2]], 3
+; GCN-PROMOTE: s_cmp_eq_u32 s{{[0-9]+}}, 2
+; GCN-PROMOTE: s_cselect_b32 [[IND2:s[0-9]+]], 2, [[IND1]]
+; GCN-PROMOTE: s_cmp_eq_u32 s{{[0-9]+}}, 3
+; GCN-PROMOTE: s_cselect_b32 [[IND3:s[0-9]+]], 3, [[IND2]]
 
 ; GCN-PROMOTE: ScratchSize: 0
 
diff --git a/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll b/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll
index bee2b706fef14..dce2e2b10c44c 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll
@@ -42,24 +42,24 @@ define amdgpu_kernel void @extract_insert_different_dynelt_v4i32(ptr addrspace(1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_mov_b64 s[0:1], s[10:11]
 ; GCN-NEXT:    buffer_load_dwordx4 v[1:4], v[4:5], s[0:3], 0 addr64
-; GCN-NEXT:    s_cmp_eq_u32 s13, 3
+; GCN-NEXT:    s_cmp_lg_u32 s13, 3
 ; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    s_cmp_eq_u32 s13, 2
+; GCN-NEXT:    s_cmp_lg_u32 s13, 2
 ; GCN-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; GCN-NEXT:    s_cmp_eq_u32 s13, 1
+; GCN-NEXT:    s_cmp_lg_u32 s13, 1
 ; GCN-NEXT:    s_mov_b64 s[10:11], s[2:3]
 ; GCN-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; GCN-NEXT:    s_cmp_eq_u32 s13, 0
+; GCN-NEXT:    s_cmp_lg_u32 s13, 0
 ; GCN-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
 ; GCN-NEXT:    v_mov_b32_e32 v0, s12
 ; GCN-NEXT:    s_cselect_b64 s[4:5], -1, 0
 ; GCN-NEXT:    s_cmp_eq_u32 s14, 1
 ; GCN-NEXT:    v_mov_b32_e32 v7, v5
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v3, v0, s[0:1]
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, v0, s[2:3]
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v1, v0, s[4:5]
+; GCN-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v0, v3, s[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v0, v2, s[2:3]
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[4:5]
 ; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
 ; GCN-NEXT:    s_cmp_eq_u32 s14, 2
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index 4212fd3b35cd8..fbd49013ddd57 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -171,11 +171,11 @@ define amdgpu_kernel void @test_vcmp_vcnd_f16(ptr addrspace(1) %out, half %x) #0
 ; GFX1032-NEXT:    s_clause 0x1
 ; GFX1032-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX1032-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032-NEXT:    v_mov_b32_e32 v0, 0x3c00
 ; GFX1032-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032-NEXT:    v_cmp_neq_f16_e64 vcc_lo, 0x7c00, s2
-; GFX1032-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v0, vcc_lo
+; GFX1032-NEXT:    v_cmp_eq_f16_e64 vcc_lo, 0x7c00, s2
+; GFX1032-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc_lo
 ; GFX1032-NEXT:    global_store_short v1, v0, s[0:1]
 ; GFX1032-NEXT:    s_endpgm
 ;
@@ -184,11 +184,11 @@ define amdgpu_kernel void @test_vcmp_vcnd_f16(ptr addrspace(1) %out, half %x) #0
 ; GFX1064-NEXT:    s_clause 0x1
 ; GFX1064-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX1064-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064-NEXT:    v_mov_b32_e32 v0, 0x3c00
 ; GFX1064-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064-NEXT:    v_cmp_neq_f16_e64 vcc, 0x7c00, s2
-; GFX1064-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v0, vcc
+; GFX1064-NEXT:    v_cmp_eq_f16_e64 vcc, 0x7c00, s2
+; GFX1064-NEXT:    v_cndmask_b32_e32 v0, s2, v0, vcc
 ; GFX1064-NEXT:    global_store_short v1, v0, s[0:1]
 ; GFX1064-NEXT:    s_endpgm
   %cmp = fcmp oeq half %x, 0x7FF0000000000000

>From ad795f59e77fb3e371b2fd277522817111687212 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Tue, 10 Jun 2025 10:58:56 +0200
Subject: [PATCH 6/6] add test

---
 llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll b/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll
index 1703dd8ace0dc..27658330a0d30 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll
@@ -722,3 +722,24 @@ define amdgpu_cs void @test_f64_uno(double %a, double %p, double %q, ptr addrspa
   store <2 x double> %ret1, ptr addrspace(1) %out
   ret void
 }
+
+define amdgpu_cs void @additional_test(i32 %a, i32 %p, i32 %q, i32 %r, ptr addrspace(1) %out) {
+; GCN-LABEL: additional_test:
+; GCN:       ; %bb.0: ; %.entry
+; GCN-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 1, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 1, v1, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v2, 2, vcc_lo
+; GCN-NEXT:    v_cndmask_b32_e32 v2, 3, v3, vcc_lo
+; GCN-NEXT:    global_store_b96 v[4:5], v[0:2], off
+; GCN-NEXT:    s_endpgm
+.entry:
+  %vcc = icmp sge i32 %a, 2
+  %val1 = select i1 %vcc, i32 %p, i32 1
+  %val2 = select i1 %vcc, i32 2, i32 %q
+  %val3 = select i1 %vcc, i32 %r, i32 3
+  %ret0 = insertelement <3 x i32> poison, i32 %val1, i32 0
+  %ret1 = insertelement <3 x i32> %ret0, i32 %val2, i32 1
+  %ret2 = insertelement <3 x i32> %ret1, i32 %val3, i32 2
+  store <3 x i32> %ret2, ptr addrspace(1) %out
+  ret void
+}



More information about the llvm-commits mailing list